Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/nodes.py: 62%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Maintainer: docutils-develop@lists.sourceforge.net

4# Copyright: This module has been placed in the public domain.

6"""

7Docutils document tree element class library.

9The relationships and semantics of elements and attributes is documented in

10`The Docutils Document Tree`__.

12Classes in CamelCase are abstract base classes or auxiliary classes. The one

13exception is `Text`, for a text (PCDATA) node; uppercase is used to

14differentiate from element classes. Classes in lower_case_with_underscores

15are element classes, matching the XML element generic identifiers in the DTD_.

17The position of each node (the level at which it can occur) is significant and

18is represented by abstract base classes (`Root`, `Structural`, `Body`,

19`Inline`, etc.). Certain transformations will be easier because we can use

20``isinstance(node, base_class)`` to determine the position of the node in the

21hierarchy.

23__ https://docutils.sourceforge.io/docs/ref/doctree.html

24.. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd

25"""

27from __future__ import annotations

29__docformat__ = 'reStructuredText'

31import os

32import re

33import sys

34import unicodedata

35import warnings

36from collections import Counter

37# import xml.dom.minidom as dom # -> conditional import in Node.asdom()

38# and document.asdom()

40# import docutils.transforms # -> delayed import in document.__init__()

42TYPE_CHECKING = False

43if TYPE_CHECKING:

44 from collections.abc import (Callable, Iterable, Iterator,

45 Mapping, Sequence)

46 from types import ModuleType

47 from typing import Any, ClassVar, Final, Literal, Self, SupportsIndex

49 from docutils.utils._typing import TypeAlias

51 from xml.dom import minidom

53 from docutils.frontend import Values

54 from docutils.transforms import Transformer, Transform

55 from docutils.utils import Reporter

57 _ContentModelCategory: TypeAlias = tuple['Element' | tuple['Element', ...]]

58 _ContentModelQuantifier = Literal['.', '?', '+', '*']

59 _ContentModelItem: TypeAlias = tuple[_ContentModelCategory,

60 _ContentModelQuantifier]

61 _ContentModelTuple: TypeAlias = tuple[_ContentModelItem, ...]

63 StrPath: TypeAlias = str | os.PathLike[str]

64 """File system path. No bytes!"""

66 _UpdateFun: TypeAlias = Callable[[str, Any, bool], None]

69# ==============================

70# Functional Node Base Classes

71# ==============================

73class Node:

74 """Abstract base class of nodes in a document tree."""

76 parent: Element | None = None

77 """Back-reference to the Node immediately containing this Node."""

79 children: Sequence # defined in subclasses

80 """List of child nodes (Elements or Text).

82 Override in subclass instances that are not terminal nodes.

83 """

85 source: StrPath | None = None

86 """Path or description of the input source which generated this Node."""

88 line: int | None = None

89 """The line number (1-based) of the beginning of this Node in `source`."""

91 tagname: str # defined in subclasses

92 """The element generic identifier."""

94 _document: document | None = None

96 @property

97 def document(self) -> document | None:

98 """Return the `document` root node of the tree containing this Node.

99 """

100 try:

101 return self._document or self.parent.document

102 except AttributeError:

103 return None

104

105 @document.setter

106 def document(self, value: document) -> None:

107 self._document = value

108

109 def __bool__(self) -> Literal[True]:

110 """

111 Node instances are always true, even if they're empty. A node is more

112 than a simple container. Its boolean "truth" does not depend on

113 having one or more subnodes in the doctree.

114

115 Use `len()` to check node length.

116 """

117 return True

118

119 def asdom(self,

120 dom: ModuleType | None = None,

121 ) -> minidom.Document | minidom.Element | minidom.Text:

122 # TODO: minidom.Document is only returned by document.asdom()

123 # (which overwrites this base-class implementation)

124 """Return a DOM **fragment** representation of this Node."""

125 if dom is None:

126 import xml.dom.minidom as dom

127 domroot = dom.Document()

128 return self._dom_node(domroot)

129

130 def pformat(self, indent: str = ' ', level: int = 0) -> str:

131 """

132 Return an indented pseudo-XML representation, for test purposes.

133

134 Override in subclasses.

135 """

136 raise NotImplementedError

137

138 def copy(self) -> Self:

139 """Return a copy of self."""

140 raise NotImplementedError

141

142 def deepcopy(self) -> Self:

143 """Return a deep copy of self (also copying children)."""

144 raise NotImplementedError

145

146 def astext(self) -> str:

147 """Return a string representation of this Node."""

148 raise NotImplementedError

149

150 def setup_child(self, child) -> None:

151 child.parent = self

152 if self.document:

153 child.document = self.document

154 if child.source is None:

155 child.source = self.document.current_source

156 if child.line is None:

157 child.line = self.document.current_line

158

159 def walk(self, visitor: NodeVisitor) -> bool:

160 """

161 Traverse a tree of `Node` objects, calling the

162 `dispatch_visit()` method of `visitor` when entering each

163 node. (The `walkabout()` method is similar, except it also

164 calls the `dispatch_departure()` method before exiting each

165 node.)

166

167 This tree traversal supports limited in-place tree

168 modifications. Replacing one node with one or more nodes is

169 OK, as is removing an element. However, if the node removed

170 or replaced occurs after the current node, the old node will

171 still be traversed, and any new nodes will not.

172

173 Within ``visit`` methods (and ``depart`` methods for

174 `walkabout()`), `TreePruningException` subclasses may be raised

175 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).

176

177 Parameter `visitor`: A `NodeVisitor` object, containing a

178 ``visit`` implementation for each `Node` subclass encountered.

179

180 Return true if we should stop the traversal.

181 """

182 stop = False

183 visitor.document.reporter.debug(

184 'docutils.nodes.Node.walk calling dispatch_visit for %s'

185 % self.__class__.__name__)

186 try:

187 try:

188 visitor.dispatch_visit(self)

189 except (SkipChildren, SkipNode):

190 return stop

191 except SkipDeparture: # not applicable; ignore

192 pass

193 children = self.children

194 try:

195 for child in children[:]:

196 if child.walk(visitor):

197 stop = True

198 break

199 except SkipSiblings:

200 pass

201 except StopTraversal:

202 stop = True

203 return stop

204

205 def walkabout(self, visitor: NodeVisitor) -> bool:

206 """

207 Perform a tree traversal similarly to `Node.walk()` (which

208 see), except also call the `dispatch_departure()` method

209 before exiting each node.

210

211 Parameter `visitor`: A `NodeVisitor` object, containing a

212 ``visit`` and ``depart`` implementation for each `Node`

213 subclass encountered.

214

215 Return true if we should stop the traversal.

216 """

217 call_depart = True

218 stop = False

219 visitor.document.reporter.debug(

220 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'

221 % self.__class__.__name__)

222 try:

223 try:

224 visitor.dispatch_visit(self)

225 except SkipNode:

226 return stop

227 except SkipDeparture:

228 call_depart = False

229 children = self.children

230 try:

231 for child in children[:]:

232 if child.walkabout(visitor):

233 stop = True

234 break

235 except SkipSiblings:

236 pass

237 except SkipChildren:

238 pass

239 except StopTraversal:

240 stop = True

241 if call_depart:

242 visitor.document.reporter.debug(

243 'docutils.nodes.Node.walkabout calling dispatch_departure '

244 'for %s' % self.__class__.__name__)

245 visitor.dispatch_departure(self)

246 return stop

247

248 def _fast_findall(self, cls: type|tuple[type]) -> Iterator:

249 """Return iterator that only supports instance checks."""

250 if isinstance(self, cls):

251 yield self

252 for child in self.children:

253 yield from child._fast_findall(cls)

254

255 def _superfast_findall(self) -> Iterator:

256 """Return iterator that doesn't check for a condition."""

257 # This is different from ``iter(self)`` implemented via

258 # __getitem__() and __len__() in the Element subclass,

259 # which yields only the direct children.

260 yield self

261 for child in self.children:

262 yield from child._superfast_findall()

263

264 def findall(self,

265 condition: type|tuple[type]|Callable[[Node], bool]|None = None,

266 include_self: bool = True,

267 descend: bool = True,

268 siblings: bool = False,

269 ascend: bool = False,

270 ) -> Iterator:

271 """

272 Return an iterator yielding nodes following `self`:

273

274 * self (if `include_self` is true)

275 * all descendants in tree traversal order (if `descend` is true)

276 * the following siblings (if `siblings` is true) and their

277 descendants (if also `descend` is true)

278 * the following siblings of the parent (if `ascend` is true) and

279 their descendants (if also `descend` is true), and so on.

280

281 If `condition` is not None, the iterator yields only nodes

282 for which ``condition(node)`` is true.

283 If `condition` is a type (or tuple of types) ``cls``, it is equivalent

284 to a function consisting of ``return isinstance(node, cls)``.

285

286 If `ascend` is true, assume `siblings` to be true as well.

287

288 If the tree structure is modified during iteration, the result

289 is undefined.

290

291 For example, given the following tree::

292

293 <paragraph>

294 <emphasis> <--- emphasis.traverse() and

295 <strong> <--- strong.traverse() are called.

296 Foo

297 Bar

298 <reference name="Baz" refid="baz">

299 Baz

300

301 Then tuple(emphasis.traverse()) equals ::

302

303 (<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)

304

305 and list(strong.traverse(ascend=True) equals ::

306

307 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]

308 """

309 if ascend:

310 siblings = True

311 # Check for special argument combinations that allow using an

312 # optimized version of traverse()

313 if include_self and descend and not siblings:

314 if condition is None:

315 yield from self._superfast_findall()

316 return

317 elif isinstance(condition, (type, tuple)):

318 yield from self._fast_findall(condition)

319 return

320 # Check if `condition` is a class (check for TypeType for Python

321 # implementations that use only new-style classes, like PyPy).

322 if isinstance(condition, (type, tuple)):

323 class_or_tuple = condition

324

325 def condition(node, class_or_tuple=class_or_tuple):

326 return isinstance(node, class_or_tuple)

327

328 if include_self and (condition is None or condition(self)):

329 yield self

330 if descend and len(self.children):

331 for child in self:

332 yield from child.findall(condition=condition,

333 include_self=True, descend=True,

334 siblings=False, ascend=False)

335 if siblings or ascend:

336 node = self

337 while node.parent:

338 index = node.parent.index(node)

339 # extra check since Text nodes have value-equality

340 while node.parent[index] is not node:

341 index = node.parent.index(node, index + 1)

342 for sibling in node.parent[index+1:]:

343 yield from sibling.findall(

344 condition=condition,

345 include_self=True, descend=descend,

346 siblings=False, ascend=False)

347 if not ascend:

348 break

349 else:

350 node = node.parent

351

352 def traverse(

353 self,

354 condition: type|tuple[type]|Callable[[Node], bool]|None = None,

355 include_self: bool = True,

356 descend: bool = True,

357 siblings: bool = False,

358 ascend: bool = False,

359 ) -> list:

360 """Return list of nodes following `self`.

361

362 For looping, Node.findall() is faster and more memory efficient.

363 """

364 # traverse() may be eventually removed:

365 warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',

366 DeprecationWarning, stacklevel=2)

367 return list(self.findall(condition, include_self, descend,

368 siblings, ascend))

369

370 def next_node(

371 self,

372 condition: type|tuple[type]|Callable[[Node], bool]|None = None,

373 include_self: bool = False,

374 descend: bool = True,

375 siblings: bool = False,

376 ascend: bool = False,

377 ) -> Node | None:

378 """

379 Return the first node in the iterator returned by findall(),

380 or None if the iterable is empty.

381

382 Parameter list is the same as of `findall()`. Note that `include_self`

383 defaults to False, though.

384 """

385 try:

386 return next(self.findall(condition, include_self,

387 descend, siblings, ascend))

388 except StopIteration:

389 return None

390

391 def validate(self, recursive: bool = True) -> None:

392 """Raise ValidationError if this node is not valid.

393

394 Override in subclasses that define validity constraints.

395 """

396

397 def validate_position(self) -> None:

398 """Hook for additional checks of the parent's content model.

399

400 Raise ValidationError, if `self` is at an invalid position.

401

402 Override in subclasses with complex validity constraints. See

403 `subtitle.validate_position()` and `transition.validate_position()`.

404 """

405

406

407class Text(Node, str): # NoQA: SLOT000 (Node doesn't define __slots__)

408 """

409 Instances are terminal nodes (leaves) containing text only; no child

410 nodes or attributes. Initialize by passing a string to the constructor.

411

412 Access the raw (null-escaped) text with ``str(<instance>)``

413 and unescaped text with ``<instance>.astext()``.

414 """

415

416 tagname: Final = '#text'

417

418 children: Final = ()

419 """Text nodes have no children, and cannot have children."""

420

421 def __new__(cls, data: str, rawsource: None = None) -> Self:

422 """Assert that `data` is not an array of bytes

423 and warn if the deprecated `rawsource` argument is used.

424 """

425 if isinstance(data, bytes):

426 raise TypeError('expecting str data, not bytes')

427 if rawsource is not None:

428 warnings.warn('nodes.Text: initialization argument "rawsource" '

429 'is ignored and will be removed in Docutils 2.0.',

430 DeprecationWarning, stacklevel=2)

431 return str.__new__(cls, data)

432

433 def shortrepr(self, maxlen: int = 18) -> str:

434 data = self

435 if len(data) > maxlen:

436 data = data[:maxlen-4] + ' ...'

437 return '<%s: %r>' % (self.tagname, str(data))

438

439 def __repr__(self) -> str:

440 return self.shortrepr(maxlen=68)

441

442 def astext(self) -> str:

443 return str(unescape(self))

444

445 def _dom_node(self, domroot: minidom.Document) -> minidom.Text:

446 return domroot.createTextNode(str(self))

447

448 def copy(self) -> Self:

449 return self.__class__(str(self))

450

451 def deepcopy(self) -> Self:

452 return self.copy()

453

454 def pformat(self, indent: str = ' ', level: int = 0) -> str:

455 try:

456 if self.document.settings.detailed:

457 tag = '%s%s' % (indent*level, '<#text>')

458 lines = (indent*(level+1) + repr(line)

459 for line in self.splitlines(True))

460 return '\n'.join((tag, *lines)) + '\n'

461 except AttributeError:

462 pass

463 indent = indent * level

464 lines = [indent+line for line in self.astext().splitlines()]

465 if not lines:

466 return ''

467 return '\n'.join(lines) + '\n'

468

469 # rstrip and lstrip are used by substitution definitions where

470 # they are expected to return a Text instance, this was formerly

471 # taken care of by UserString.

472

473 def rstrip(self, chars: str | None = None) -> Self:

474 return self.__class__(str.rstrip(self, chars))

475

476 def lstrip(self, chars: str | None = None) -> Self:

477 return self.__class__(str.lstrip(self, chars))

478

479

480class Element(Node):

481 """

482 `Element` is the superclass to all specific elements.

483

484 Elements contain attributes and child nodes.

485 They can be described as a cross between a list and a dictionary.

486

487 Elements emulate dictionaries for external [#]_ attributes, indexing by

488 attribute name (a string). To set the attribute 'att' to 'value', do::

489

490 element['att'] = 'value'

491

492 .. [#] External attributes correspond to the XML element attributes.

493 From its `Node` superclass, Element also inherits "internal"

494 class attributes that are accessed using the standard syntax, e.g.

495 ``element.parent``.

496

497 There are two special attributes: 'ids' and 'names'. Both are

498 lists of unique identifiers: 'ids' conform to the regular expression

499 ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and

500 details). 'names' serve as user-friendly interfaces to IDs; they are

501 case- and whitespace-normalized (see the fully_normalize_name() function).

502

503 Elements emulate lists for child nodes (element nodes and/or text

504 nodes), indexing by integer. To get the first child node, use::

505

506 element[0]

507

508 to iterate over the child nodes (without descending), use::

509

510 for child in element:

511 ...

512

513 Elements may be constructed using the ``+=`` operator. To add one new

514 child node to element, do::

515

516 element += node

517

518 This is equivalent to ``element.append(node)``.

519

520 To add a list of multiple child nodes at once, use the same ``+=``

521 operator::

522

523 element += [node1, node2]

524

525 This is equivalent to ``element.extend([node1, node2])``.

526 """

527

528 list_attributes: Final = ('ids', 'classes', 'names', 'dupnames')

529 """Tuple of attributes that are initialized to empty lists.

530

531 NOTE: Derived classes should update this value when supporting

532 additional list attributes.

533 """

534

535 valid_attributes: Final = list_attributes + ('source',)

536 """Tuple of attributes that are valid for elements of this class.

537

538 NOTE: Derived classes should update this value when supporting

539 additional attributes.

540 """

541

542 common_attributes: Final = valid_attributes

543 """Tuple of `common attributes`__ known to all Doctree Element classes.

544

545 __ https://docutils.sourceforge.io/docs/ref/doctree.html#common-attributes

546 """

547

548 known_attributes: Final = common_attributes

549 """Alias for `common_attributes`. Will be removed in Docutils 2.0."""

550

551 basic_attributes: Final = list_attributes

552 """Common list attributes. Deprecated. Will be removed in Docutils 2.0."""

553

554 local_attributes: Final = ('backrefs',)

555 """Obsolete. Will be removed in Docutils 2.0."""

556

557 content_model: ClassVar[_ContentModelTuple] = ()

558 """Python representation of the element's content model (cf. docutils.dtd).

559

560 A tuple of ``(category, quantifier)`` tuples with

561

562 :category: class or tuple of classes that are expected at this place(s)

563 in the list of children

564 :quantifier: string representation stating how many elements

565 of `category` are expected. Value is one of:

566 '.' (exactly one), '?' (zero or one),

567 '+' (one or more), '*' (zero or more).

568

569 NOTE: The default describes the empty element. Derived classes should

570 update this value to match their content model.

571

572 Provisional.

573 """

574

575 tagname: str | None = None

576 """The element generic identifier.

577

578 If None, it is set as an instance attribute to the name of the class.

579 """

580

581 child_text_separator: Final = '\n\n'

582 """Separator for child nodes, used by `astext()` method."""

583

584 def __init__(self,

585 rawsource: str = '',

586 *children,

587 **attributes: Any,

588 ) -> None:

589 self.rawsource = rawsource

590 """The raw text from which this element was constructed.

591

592 For informative and debugging purposes. Don't rely on its value!

593

594 NOTE: some elements do not set this value (default '').

595 """

596 if isinstance(rawsource, Element):

597 raise TypeError('First argument "rawsource" must be a string.')

598

599 self.children: list = []

600 """List of child nodes (elements and/or `Text`)."""

601

602 self.extend(children) # maintain parent info

603

604 self.attributes: dict[str, Any] = {}

605 """Dictionary of attribute {name: value}."""

606

607 # Initialize list attributes.

608 for att in self.list_attributes:

609 self.attributes[att] = []

610

611 for att, value in attributes.items():

612 att = att.lower() # normalize attribute name

613 if att in self.list_attributes:

614 # lists are mutable; make a copy for this node

615 self.attributes[att] = value[:]

616 else:

617 self.attributes[att] = value

618

619 if self.tagname is None:

620 self.tagname: str = self.__class__.__name__

621

622 def _dom_node(self, domroot: minidom.Document) -> minidom.Element:

623 element = domroot.createElement(self.tagname)

624 for attribute, value in self.attlist():

625 if isinstance(value, list):

626 value = ' '.join(serial_escape('%s' % (v,)) for v in value)

627 element.setAttribute(attribute, '%s' % value)

628 for child in self.children:

629 element.appendChild(child._dom_node(domroot))

630 return element

631

632 def __repr__(self) -> str:

633 data = ''

634 for c in self.children:

635 data += c.shortrepr()

636 if len(data) > 60:

637 data = data[:56] + ' ...'

638 break

639 if self['names']:

640 return '<%s "%s": %s>' % (self.tagname,

641 '; '.join(self['names']), data)

642 else:

643 return '<%s: %s>' % (self.tagname, data)

644

645 def shortrepr(self) -> str:

646 if self['names']:

647 return '<%s "%s"...>' % (self.tagname, '; '.join(self['names']))

648 else:

649 return '<%s...>' % self.tagname

650

651 def __str__(self) -> str:

652 if self.children:

653 return '%s%s%s' % (self.starttag(),

654 ''.join(str(c) for c in self.children),

655 self.endtag())

656 else:

657 return self.emptytag()

658

659 def starttag(self, quoteattr: Callable[[str], str] | None = None) -> str:

660 # the optional arg is used by the docutils_xml writer

661 if quoteattr is None:

662 quoteattr = pseudo_quoteattr

663 parts = [self.tagname]

664 for name, value in self.attlist():

665 if value is None: # boolean attribute

666 parts.append('%s="True"' % name)

667 continue

668 if isinstance(value, bool):

669 value = str(int(value))

670 if isinstance(value, list):

671 values = [serial_escape('%s' % (v,)) for v in value]

672 value = ' '.join(values)

673 else:

674 value = str(value)

675 value = quoteattr(value)

676 parts.append('%s=%s' % (name, value))

677 return '<%s>' % ' '.join(parts)

678

679 def endtag(self) -> str:

680 return '</%s>' % self.tagname

681

682 def emptytag(self) -> str:

683 attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())

684 return '<%s/>' % ' '.join((self.tagname, *attributes))

685

686 def __len__(self) -> int:

687 return len(self.children)

688

689 def __contains__(self, key) -> bool:

690 # Test for both, children and attributes with operator ``in``.

691 if isinstance(key, str):

692 return key in self.attributes

693 return key in self.children

694

695 def __getitem__(self, key: str | int | slice) -> Any:

696 if isinstance(key, str):

697 return self.attributes[key]

698 elif isinstance(key, int):

699 return self.children[key]

700 elif isinstance(key, slice):

701 assert key.step in (None, 1), 'cannot handle slice with stride'

702 return self.children[key.start:key.stop]

703 else:

704 raise TypeError('element index must be an integer, a slice, or '

705 'an attribute name string')

706

707 def __setitem__(self, key, item) -> None:

708 if isinstance(key, str):

709 self.attributes[str(key)] = item

710 elif isinstance(key, int):

711 self.setup_child(item)

712 self.children[key] = item

713 elif isinstance(key, slice):

714 assert key.step in (None, 1), 'cannot handle slice with stride'

715 for node in item:

716 self.setup_child(node)

717 self.children[key.start:key.stop] = item

718 else:

719 raise TypeError('element index must be an integer, a slice, or '

720 'an attribute name string')

721

722 def __delitem__(self, key: str | int | slice) -> None:

723 if isinstance(key, str):

724 del self.attributes[key]

725 elif isinstance(key, int):

726 del self.children[key]

727 elif isinstance(key, slice):

728 assert key.step in (None, 1), 'cannot handle slice with stride'

729 del self.children[key.start:key.stop]

730 else:

731 raise TypeError('element index must be an integer, a simple '

732 'slice, or an attribute name string')

733

734 def __add__(self, other: list) -> list:

735 return self.children + other

736

737 def __radd__(self, other: list) -> list:

738 return other + self.children

739

740 def __iadd__(self, other) -> Self:

741 """Append a node or a list of nodes to `self.children`."""

742 if isinstance(other, Node):

743 self.append(other)

744 elif other is not None:

745 self.extend(other)

746 return self

747

748 def astext(self) -> str:

749 return self.child_text_separator.join(

750 [child.astext() for child in self.children])

751

752 def non_default_attributes(self) -> dict[str, Any]:

753 atts = {key: value for key, value in self.attributes.items()

754 if self.is_not_default(key)}

755 return atts

756

757 def attlist(self) -> list[tuple[str, Any]]:

758 return sorted(self.non_default_attributes().items())

759

760 def get(self, key: str, failobj: Any | None = None) -> Any:

761 return self.attributes.get(key, failobj)

762

763 def hasattr(self, attr: str) -> bool:

764 return attr in self.attributes

765

766 def delattr(self, attr: str) -> None:

767 if attr in self.attributes:

768 del self.attributes[attr]

769

770 def setdefault(self, key: str, failobj: Any | None = None) -> Any:

771 return self.attributes.setdefault(key, failobj)

772

773 has_key = hasattr

774

775 def get_language_code(self, fallback: str = '') -> str:

776 """Return node's language tag.

777

778 Look iteratively in self and parents for a class argument

779 starting with ``language-`` and return the remainder of it

780 (which should be a `BCP49` language tag) or the `fallback`.

781 """

782 for cls in self.get('classes', []):

783 if cls.startswith('language-'):

784 return cls.removeprefix('language-')

785 try:

786 return self.parent.get_language_code(fallback)

787 except AttributeError:

788 return fallback

789

790 def append(self, item) -> None:

791 self.setup_child(item)

792 self.children.append(item)

793

794 def extend(self, item: Iterable) -> None:

795 for node in item:

796 self.append(node)

797

798 def insert(self, index: SupportsIndex, item) -> None:

799 if isinstance(item, Node):

800 self.setup_child(item)

801 self.children.insert(index, item)

802 elif item is not None:

803 self[index:index] = item

804

805 def pop(self, i: int = -1):

806 return self.children.pop(i)

807

808 def remove(self, item) -> None:

809 self.children.remove(item)

810

811 def index(self, item, start: int = 0, stop: int = sys.maxsize) -> int:

812 return self.children.index(item, start, stop)

813

814 def previous_sibling(self):

815 """Return preceding sibling node or ``None``."""

816 try:

817 i = self.parent.index(self)

818 except (AttributeError):

819 return None

820 return self.parent[i-1] if i > 0 else None

821

822 def section_hierarchy(self) -> list[section]:

823 """Return the element's section anchestors.

824

825 Return a list of all <section> elements that contain `self`

826 (including `self` if it is a <section>) and have a parent node.

827

828 List item ``[i]`` is the parent <section> of level i+1

829 (1: section, 2: subsection, 3: subsubsection, ...).

830 The length of the list is the element's section level.

831

832 See `docutils.parsers.rst.states.RSTState.check_subsection()`

833 for a usage example.

834

835 Provisional. May be changed or removed without warning.

836 """

837 sections = []

838 node = self

839 while node.parent is not None:

840 if isinstance(node, section):

841 sections.append(node)

842 node = node.parent

843 sections.reverse()

844 return sections

845

846 def is_not_default(self, key: str) -> bool:

847 if self[key] == [] and key in self.list_attributes:

848 return False

849 else:

850 return True

851

852 def update_basic_atts(self, dict_: Mapping[str, Any] | Element) -> None:

853 """

854 Update basic attributes ('ids', 'names', 'classes',

855 'dupnames', but not 'source') from node or dictionary `dict_`.

856

857 Provisional.

858 """

859 if isinstance(dict_, Node):

860 dict_ = dict_.attributes

861 for att in self.basic_attributes:

862 self.append_attr_list(att, dict_.get(att, []))

863

864 def append_attr_list(self, attr: str, values: Iterable[Any]) -> None:

865 """

866 For each element in values, if it does not exist in self[attr], append

867 it.

868

869 NOTE: Requires self[attr] and values to be sequence type and the

870 former should specifically be a list.

871 """

872 # List Concatenation

873 for value in values:

874 if value not in self[attr]:

875 self[attr].append(value)

876

877 def coerce_append_attr_list(

878 self, attr: str, value: list[Any] | Any) -> None:

879 """

880 First, convert both self[attr] and value to a non-string sequence

881 type; if either is not already a sequence, convert it to a list of one

882 element. Then call append_attr_list.

883

884 NOTE: self[attr] and value both must not be None.

885 """

886 # List Concatenation

887 if not isinstance(self.get(attr), list):

888 self[attr] = [self[attr]]

889 if not isinstance(value, list):

890 value = [value]

891 self.append_attr_list(attr, value)

892

893 def replace_attr(self, attr: str, value: Any, force: bool = True) -> None:

894 """

895 If self[attr] does not exist or force is True or omitted, set

896 self[attr] to value, otherwise do nothing.

897 """

898 # One or the other

899 if force or self.get(attr) is None:

900 self[attr] = value

901

902 def copy_attr_convert(

903 self, attr: str, value: Any, replace: bool = True) -> None:

904 """

905 If attr is an attribute of self, set self[attr] to

906 [self[attr], value], otherwise set self[attr] to value.

907

908 NOTE: replace is not used by this function and is kept only for

909 compatibility with the other copy functions.

910 """

911 if self.get(attr) is not value:

912 self.coerce_append_attr_list(attr, value)

913

914 def copy_attr_coerce(self, attr: str, value: Any, replace: bool) -> None:

915 """

916 If attr is an attribute of self and either self[attr] or value is a

917 list, convert all non-sequence values to a sequence of 1 element and

918 then concatenate the two sequence, setting the result to self[attr].

919 If both self[attr] and value are non-sequences and replace is True or

920 self[attr] is None, replace self[attr] with value. Otherwise, do

921 nothing.

922 """

923 if self.get(attr) is not value:

924 if isinstance(self.get(attr), list) or \

925 isinstance(value, list):

926 self.coerce_append_attr_list(attr, value)

927 else:

928 self.replace_attr(attr, value, replace)

929

930 def copy_attr_concatenate(

931 self, attr: str, value: Any, replace: bool) -> None:

932 """

933 If attr is an attribute of self and both self[attr] and value are

934 lists, concatenate the two sequences, setting the result to

935 self[attr]. If either self[attr] or value are non-sequences and

936 replace is True or self[attr] is None, replace self[attr] with value.

937 Otherwise, do nothing.

938 """

939 if self.get(attr) is not value:

940 if isinstance(self.get(attr), list) and \

941 isinstance(value, list):

942 self.append_attr_list(attr, value)

943 else:

944 self.replace_attr(attr, value, replace)

945

946 def copy_attr_consistent(

947 self, attr: str, value: Any, replace: bool) -> None:

948 """

949 If replace is True or self[attr] is None, replace self[attr] with

950 value. Otherwise, do nothing.

951 """

952 if self.get(attr) is not value:

953 self.replace_attr(attr, value, replace)

954

955 def update_all_atts(self,

956 dict_: Mapping[str, Any] | Element,

957 update_fun: _UpdateFun = copy_attr_consistent,

958 replace: bool = True,

959 and_source: bool = False,

960 ) -> None:

961 """

962 Updates all attributes from node or dictionary `dict_`.

963

964 Appends the basic attributes ('ids', 'names', 'classes',

965 'dupnames', but not 'source') and then, for all other attributes in

966 dict_, updates the same attribute in self. When attributes with the

967 same identifier appear in both self and dict_, the two values are

968 merged based on the value of update_fun. Generally, when replace is

969 True, the values in self are replaced or merged with the values in

970 dict_; otherwise, the values in self may be preserved or merged. When

971 and_source is True, the 'source' attribute is included in the copy.

972

973 NOTE: When replace is False, and self contains a 'source' attribute,

974 'source' is not replaced even when dict_ has a 'source'

975 attribute, though it may still be merged into a list depending

976 on the value of update_fun.

977 NOTE: It is easier to call the update-specific methods then to pass

978 the update_fun method to this function.

979 """

980 if isinstance(dict_, Node):

981 dict_ = dict_.attributes

982

983 # Include the source attribute when copying?

984 if and_source:

985 filter_fun = self.is_not_list_attribute

986 else:

987 filter_fun = self.is_not_known_attribute

988

989 # Copy the basic attributes

990 self.update_basic_atts(dict_)

991

992 # Grab other attributes in dict_ not in self except the

993 # (All basic attributes should be copied already)

994 for att in filter(filter_fun, dict_):

995 update_fun(self, att, dict_[att], replace)

996

997 def update_all_atts_consistantly(self,

998 dict_: Mapping[str, Any] | Element,

999 replace: bool = True,

1000 and_source: bool = False,

1001 ) -> None:

1002 """

1003 Updates all attributes from node or dictionary `dict_`.

1004

1005 Appends the basic attributes ('ids', 'names', 'classes',

1006 'dupnames', but not 'source') and then, for all other attributes in

1007 dict_, updates the same attribute in self. When attributes with the

1008 same identifier appear in both self and dict_ and replace is True, the

1009 values in self are replaced with the values in dict_; otherwise, the

1010 values in self are preserved. When and_source is True, the 'source'

1011 attribute is included in the copy.

1012

1013 NOTE: When replace is False, and self contains a 'source' attribute,

1014 'source' is not replaced even when dict_ has a 'source'

1015 attribute, though it may still be merged into a list depending

1016 on the value of update_fun.

1017 """

1018 self.update_all_atts(dict_, Element.copy_attr_consistent, replace,

1019 and_source)

1020

1021 def update_all_atts_concatenating(self,

1022 dict_: Mapping[str, Any] | Element,

1023 replace: bool = True,

1024 and_source: bool = False,

1025 ) -> None:

1026 """

1027 Updates all attributes from node or dictionary `dict_`.

1028

1029 Appends the basic attributes ('ids', 'names', 'classes',

1030 'dupnames', but not 'source') and then, for all other attributes in

1031 dict_, updates the same attribute in self. When attributes with the

1032 same identifier appear in both self and dict_ whose values aren't each

1033 lists and replace is True, the values in self are replaced with the

1034 values in dict_; if the values from self and dict_ for the given

1035 identifier are both of list type, then the two lists are concatenated

1036 and the result stored in self; otherwise, the values in self are

1037 preserved. When and_source is True, the 'source' attribute is

1038 included in the copy.

1039

1040 NOTE: When replace is False, and self contains a 'source' attribute,

1041 'source' is not replaced even when dict_ has a 'source'

1042 attribute, though it may still be merged into a list depending

1043 on the value of update_fun.

1044 """

1045 self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,

1046 and_source)

1047

1048 def update_all_atts_coercion(self,

1049 dict_: Mapping[str, Any] | Element,

1050 replace: bool = True,

1051 and_source: bool = False,

1052 ) -> None:

1053 """

1054 Updates all attributes from node or dictionary `dict_`.

1055

1056 Appends the basic attributes ('ids', 'names', 'classes',

1057 'dupnames', but not 'source') and then, for all other attributes in

1058 dict_, updates the same attribute in self. When attributes with the

1059 same identifier appear in both self and dict_ whose values are both

1060 not lists and replace is True, the values in self are replaced with

1061 the values in dict_; if either of the values from self and dict_ for

1062 the given identifier are of list type, then first any non-lists are

1063 converted to 1-element lists and then the two lists are concatenated

1064 and the result stored in self; otherwise, the values in self are

1065 preserved. When and_source is True, the 'source' attribute is

1066 included in the copy.

1067

1068 NOTE: When replace is False, and self contains a 'source' attribute,

1069 'source' is not replaced even when dict_ has a 'source'

1070 attribute, though it may still be merged into a list depending

1071 on the value of update_fun.

1072 """

1073 self.update_all_atts(dict_, Element.copy_attr_coerce, replace,

1074 and_source)

1075

1076 def update_all_atts_convert(self,

1077 dict_: Mapping[str, Any] | Element,

1078 and_source: bool = False,

1079 ) -> None:

1080 """

1081 Updates all attributes from node or dictionary `dict_`.

1082

1083 Appends the basic attributes ('ids', 'names', 'classes',

1084 'dupnames', but not 'source') and then, for all other attributes in

1085 dict_, updates the same attribute in self. When attributes with the

1086 same identifier appear in both self and dict_ then first any non-lists

1087 are converted to 1-element lists and then the two lists are

1088 concatenated and the result stored in self; otherwise, the values in

1089 self are preserved. When and_source is True, the 'source' attribute

1090 is included in the copy.

1091

1092 NOTE: When replace is False, and self contains a 'source' attribute,

1093 'source' is not replaced even when dict_ has a 'source'

1094 attribute, though it may still be merged into a list depending

1095 on the value of update_fun.

1096 """

1097 self.update_all_atts(dict_, Element.copy_attr_convert,

1098 and_source=and_source)

1099

1100 def clear(self) -> None:

1101 self.children = []

1102

1103 def replace(self, old, new) -> None:

1104 """Replace one child `Node` with another child or children."""

1105 index = self.index(old)

1106 if isinstance(new, Node):

1107 self.setup_child(new)

1108 self[index] = new

1109 elif new is not None:

1110 self[index:index+1] = new

1111

1112 def replace_self(self, new) -> None:

1113 """

1114 Replace `self` node with `new`, where `new` is a node or a

1115 list of nodes.

1116

1117 Provisional: the handling of node attributes will be revised.

1118 """

1119 update = new

1120 if not isinstance(new, Node):

1121 # `new` is a list; update first child.

1122 try:

1123 update = new[0]

1124 except IndexError:

1125 update = None

1126 if isinstance(update, Element):

1127 update.update_basic_atts(self)

1128 else:

1129 # `update` is a Text node or `new` is an empty list.

1130 # Assert that we aren't losing any attributes.

1131 for att in self.basic_attributes:

1132 assert not self[att], \

1133 'Losing "%s" attribute: %s' % (att, self[att])

1134 self.parent.replace(self, new)

1135

1136 def first_child_matching_class(self,

1137 childclass: type[Element] | type[Text]

1138 | tuple[type[Element] | type[Text], ...],

1139 start: int = 0,

1140 end: int = sys.maxsize,

1141 ) -> int | None:

1142 """

1143 Return the index of the first child whose class exactly matches.

1144

1145 Parameters:

1146

1147 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`

1148 classes. If a tuple, any of the classes may match.

1149 - `start`: Initial index to check.

1150 - `end`: Initial index to *not* check.

1151 """

1152 if not isinstance(childclass, tuple):

1153 childclass = (childclass,)

1154 for index in range(start, min(len(self), end)):

1155 for c in childclass:

1156 if isinstance(self[index], c):

1157 return index

1158 return None

1159

1160 def first_child_not_matching_class(

1161 self,

1162 childclass: type[Element] | type[Text]

1163 | tuple[type[Element] | type[Text], ...],

1164 start: int = 0,

1165 end: int = sys.maxsize,

1166 ) -> int | None:

1167 """

1168 Return the index of the first child whose class does *not* match.

1169

1170 Parameters:

1171

1172 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`

1173 classes. If a tuple, none of the classes may match.

1174 - `start`: Initial index to check.

1175 - `end`: Initial index to *not* check.

1176 """

1177 if not isinstance(childclass, tuple):

1178 childclass = (childclass,)

1179 for index in range(start, min(len(self), end)):

1180 for c in childclass:

1181 if isinstance(self.children[index], c):

1182 break

1183 else:

1184 return index

1185 return None

1186

1187 def pformat(self, indent: str = ' ', level: int = 0) -> str:

1188 tagline = '%s%s\n' % (indent*level, self.starttag())

1189 childreps = (c.pformat(indent, level+1) for c in self.children)

1190 return ''.join((tagline, *childreps))

1191

1192 def copy(self) -> Self:

1193 obj = self.__class__(rawsource=self.rawsource, **self.attributes)

1194 obj._document = self._document

1195 obj.source = self.source

1196 obj.line = self.line

1197 return obj

1198

1199 def deepcopy(self) -> Self:

1200 copy = self.copy()

1201 copy.extend([child.deepcopy() for child in self.children])

1202 return copy

1203

1204 def note_referenced_by(self,

1205 name: str | None = None,

1206 id: str | None = None,

1207 ) -> None:

1208 """Note that this Element has been referenced by its name

1209 `name` or id `id`."""

1210 self.referenced = True

1211 # Element.expect_referenced_by_* dictionaries map names or ids

1212 # to nodes whose ``referenced`` attribute is set to true as

1213 # soon as this node is referenced by the given name or id.

1214 # Needed for target propagation.

1215 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)

1216 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)

1217 if by_name:

1218 assert name is not None

1219 by_name.referenced = True

1220 if by_id:

1221 assert id is not None

1222 by_id.referenced = True

1223

1224 @classmethod

1225 def is_not_list_attribute(cls, attr: str) -> bool:

1226 """

1227 Returns True if and only if the given attribute is NOT one of the

1228 basic list attributes defined for all Elements.

1229 """

1230 return attr not in cls.list_attributes

1231

1232 @classmethod

1233 def is_not_known_attribute(cls, attr: str) -> bool:

1234 """

1235 Return True if `attr` is NOT defined for all Element instances.

1236

1237 Provisional. May be removed in Docutils 2.0.

1238 """

1239 return attr not in cls.common_attributes

1240

1241 def validate_attributes(self) -> None:

1242 """Normalize and validate element attributes.

1243

1244 Convert string values to expected datatype.

1245 Normalize values.

1246

1247 Raise `ValidationError` for invalid attributes or attribute values.

1248

1249 Provisional.

1250 """

1251 messages = []

1252 for key, value in self.attributes.items():

1253 if key.startswith('internal:'):

1254 continue # see docs/user/config.html#expose-internals

1255 if key not in self.valid_attributes:

1256 va = '", "'.join(self.valid_attributes)

1257 messages.append(f'Attribute "{key}" not one of "{va}".')

1258 continue

1259 try:

1260 self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)

1261 except (ValueError, TypeError, KeyError) as e:

1262 messages.append(

1263 f'Attribute "{key}" has invalid value "{value}".\n {e}')

1264 if messages:

1265 raise ValidationError(f'Element {self.starttag()} invalid:\n '

1266 + '\n '.join(messages),

1267 problematic_element=self)

1268

1269 def validate_content(self,

1270 model: _ContentModelTuple | None = None,

1271 elements: Sequence | None = None,

1272 ) -> list:

1273 """Test compliance of `elements` with `model`.

1274

1275 :model: content model description, default `self.content_model`,

1276 :elements: list of doctree elements, default `self.children`.

1277

1278 Return list of children that do not fit in the model or raise

1279 `ValidationError` if the content does not comply with the `model`.

1280

1281 Provisional.

1282 """

1283 if model is None:

1284 model = self.content_model

1285 if elements is None:

1286 elements = self.children

1287 ichildren = iter(elements)

1288 child = next(ichildren, None)

1289 for category, quantifier in model:

1290 if not isinstance(child, category):

1291 if quantifier in ('.', '+'):

1292 raise ValidationError(self._report_child(child, category),

1293 problematic_element=child)

1294 else: # quantifier in ('?', '*') -> optional child

1295 continue # try same child with next part of content model

1296 else:

1297 # Check additional placement constraints (if applicable):

1298 child.validate_position()

1299 # advance:

1300 if quantifier in ('.', '?'): # go to next element

1301 child = next(ichildren, None)

1302 else: # if quantifier in ('*', '+'): # pass all matching elements

1303 for child in ichildren:

1304 if not isinstance(child, category):

1305 break

1306 try:

1307 child.validate_position()

1308 except AttributeError:

1309 pass

1310 else:

1311 child = None

1312 return [] if child is None else [child, *ichildren]

1313

1314 def _report_child(self,

1315 child,

1316 category: Element | Iterable[Element],

1317 ) -> str:

1318 # Return a str reporting a missing child or child of wrong category.

1319 try:

1320 _type = category.__name__

1321 except AttributeError:

1322 _type = '> or <'.join(c.__name__ for c in category)

1323 msg = f'Element {self.starttag()} invalid:\n'

1324 if child is None:

1325 return f'{msg} Missing child of type <{_type}>.'

1326 if isinstance(child, Text):

1327 return (f'{msg} Expecting child of type <{_type}>, '

1328 f'not text data "{child.astext()}".')

1329 return (f'{msg} Expecting child of type <{_type}>, '

1330 f'not {child.starttag()}.')

1331

1332 def validate(self, recursive: bool = True) -> None:

1333 """Validate Docutils Document Tree element ("doctree").

1334

1335 Raise ValidationError if there are violations.

1336 If `recursive` is True, validate also the element's descendants.

1337

1338 See `The Docutils Document Tree`__ for details of the

1339 Docutils Document Model.

1340

1341 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1342

1343 Provisional (work in progress).

1344 """

1345 self.validate_attributes()

1346

1347 leftover_childs = self.validate_content()

1348 for child in leftover_childs:

1349 if isinstance(child, Text):

1350 raise ValidationError(f'Element {self.starttag()} invalid:\n'

1351 f' Spurious text: "{child.astext()}".',

1352 problematic_element=self)

1353 else:

1354 raise ValidationError(f'Element {self.starttag()} invalid:\n'

1355 f' Child element {child.starttag()} '

1356 'not allowed at this position.',

1357 problematic_element=child)

1358

1359 if recursive:

1360 for child in self:

1361 child.validate(recursive=recursive)

1362

1363

1364# ====================

1365# Element Categories

1366# ====================

1367#

1368# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-hierarchy.

1369

1370class Root:

1371 """Element at the root of a document tree."""

1372

1373

1374class Structural:

1375 """`Structural elements`__.

1376

1377 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1378 #structural-elements

1379 """

1380

1381

1382class SubStructural:

1383 """`Structural subelements`__ are children of `Structural` elements.

1384

1385 Most Structural elements accept only specific `SubStructural` elements.

1386

1387 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1388 #structural-subelements

1389 """

1390

1391

1392class Bibliographic:

1393 """`Bibliographic Elements`__ (displayed document meta-data).

1394

1395 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1396 #bibliographic-elements

1397 """

1398

1399

1400class Body:

1401 """`Body elements`__.

1402

1403 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-elements

1404 """

1405

1406

1407class Admonition(Body):

1408 """Admonitions (distinctive and self-contained notices)."""

1409 content_model: Final = ((Body, '+'),) # (%body.elements;)+

1410

1411

1412class Sequential(Body):

1413 """List-like body elements."""

1414

1415

1416class General(Body):

1417 """Miscellaneous body elements."""

1418

1419

1420class Special(Body):

1421 """Special internal body elements."""

1422

1423

1424class Part:

1425 """`Body Subelements`__ always occur within specific parent elements.

1426

1427 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-subelements

1428 """

1429

1430

1431class Decorative:

1432 """Decorative elements (`header` and `footer`).

1433

1434 Children of `decoration`.

1435 """

1436 content_model: Final = ((Body, '+'),) # (%body.elements;)+

1437

1438

1439class Inline:

1440 """Inline elements contain text data and possibly other inline elements.

1441 """

1442

1443

1444# Orthogonal categories and Mixins

1445# ================================

1446

1447class PreBibliographic:

1448 """Elements which may occur before Bibliographic Elements."""

1449

1450

1451class Invisible(Special, PreBibliographic):

1452 """Internal elements that don't appear in output."""

1453

1454

1455class Labeled:

1456 """Contains a `label` as its first element."""

1457

1458

1459class Resolvable:

1460 resolved: bool = False

1461

1462

1463class BackLinkable:

1464 """Mixin for Elements that accept a "backrefs" attribute."""

1465

1466 list_attributes: Final = Element.list_attributes + ('backrefs',)

1467 valid_attributes: Final = Element.valid_attributes + ('backrefs',)

1468

1469 def add_backref(self: Element, refid: str) -> None:

1470 self['backrefs'].append(refid)

1471

1472

1473class Referential(Resolvable):

1474 """Elements holding a cross-reference (outgoing hyperlink)."""

1475

1476

1477class Targetable(Resolvable):

1478 """Cross-reference targets (incoming hyperlink)."""

1479 referenced: int = 0

1480

1481 indirect_reference_name: str | None = None

1482 """Holds the whitespace_normalized_name (contains mixed case) of a target.

1483

1484 This was required for MoinMoin <= 1.9 compatibility.

1485

1486 Deprecated, will be removed in Docutils 1.0.

1487 """

1488

1489

1490class Titular:

1491 """Title, sub-title, or informal heading (rubric)."""

1492

1493

1494class TextElement(Element):

1495 """

1496 An element which directly contains text.

1497

1498 Its children are all `Text` or `Inline` subclass nodes. You can

1499 check whether an element's context is inline simply by checking whether

1500 its immediate parent is a `TextElement` instance (including subclasses).

1501 This is handy for nodes like `image` that can appear both inline and as

1502 standalone body elements.

1503

1504 If passing children to `__init__()`, make sure to set `text` to

1505 ``''`` or some other suitable value.

1506 """

1507 content_model: Final = (((Text, Inline), '*'),)

1508 # (#PCDATA | %inline.elements;)*

1509

1510 child_text_separator: Final = ''

1511 """Separator for child nodes, used by `astext()` method."""

1512

1513 def __init__(self,

1514 rawsource: str = '',

1515 text: str = '',

1516 *children,

1517 **attributes: Any,

1518 ) -> None:

1519 if text:

1520 textnode = Text(text)

1521 Element.__init__(self, rawsource, textnode, *children,

1522 **attributes)

1523 else:

1524 Element.__init__(self, rawsource, *children, **attributes)

1525

1526

1527class FixedTextElement(TextElement):

1528 """An element which directly contains preformatted text."""

1529

1530 valid_attributes: Final = Element.valid_attributes + ('xml:space',)

1531

1532 def __init__(self,

1533 rawsource: str = '',

1534 text: str = '',

1535 *children,

1536 **attributes: Any,

1537 ) -> None:

1538 super().__init__(rawsource, text, *children, **attributes)

1539 self.attributes['xml:space'] = 'preserve'

1540

1541

1542class PureTextElement(TextElement):

1543 """An element which only contains text, no children."""

1544 content_model: Final = ((Text, '?'),) # (#PCDATA)

1545

1546

1547# =================================

1548# Concrete Document Tree Elements

1549# =================================

1550#

1551# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-reference

1552

1553# Decorative Elements

1554# ===================

1555

1556class header(Decorative, Element): pass

1557class footer(Decorative, Element): pass

1558

1559

1560# Structural Subelements

1561# ======================

1562

1563class title(Titular, PreBibliographic, SubStructural, TextElement):

1564 """Title of `document`, `section`, `topic` and generic `admonition`.

1565 """

1566 valid_attributes: Final = Element.valid_attributes + ('auto', 'refid')

1567

1568

1569class subtitle(Titular, PreBibliographic, SubStructural, TextElement):

1570 """Sub-title of `document`, `section` and `sidebar`."""

1571

1572 def validate_position(self) -> None:

1573 """Check position of subtitle: must follow a title."""

1574 if self.parent and self.parent.index(self) == 0:

1575 raise ValidationError(f'Element {self.parent.starttag()} invalid:'

1576 '\n <subtitle> only allowed after <title>.',

1577 problematic_element=self)

1578

1579

1580class meta(PreBibliographic, SubStructural, Element):

1581 """Container for "invisible" bibliographic data, or meta-data."""

1582 valid_attributes: Final = Element.valid_attributes + (

1583 'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')

1584

1585

1586class docinfo(SubStructural, Element):

1587 """Container for displayed document meta-data."""

1588 content_model: Final = ((Bibliographic, '+'),)

1589 # (%bibliographic.elements;)+

1590

1591

1592class decoration(PreBibliographic, SubStructural, Element):

1593 """Container for `header` and `footer`."""

1594 content_model: Final = ((header, '?'), # Empty element doesn't make sense,

1595 (footer, '?'), # but is simpler to define.

1596 )

1597 # (header?, footer?)

1598

1599 def get_header(self) -> header:

1600 if not len(self.children) or not isinstance(self.children[0], header):

1601 self.insert(0, header())

1602 return self.children[0]

1603

1604 def get_footer(self) -> footer:

1605 if not len(self.children) or not isinstance(self.children[-1], footer):

1606 self.append(footer())

1607 return self.children[-1]

1608

1609

1610class transition(SubStructural, Element):

1611 """Transitions__ are breaks between untitled text parts.

1612

1613 __ https://docutils.sourceforge.io/docs/ref/doctree.html#transition

1614 """

1615

1616 def validate_position(self) -> None:

1617 """Check additional constraints on `transition` placement.

1618

1619 A transition may not begin or end a section or document,

1620 nor may two transitions be immediately adjacent.

1621 """

1622 messages = [f'Element {self.parent.starttag()} invalid:']

1623 predecessor = self.previous_sibling()

1624 if (predecessor is None # index == 0

1625 or isinstance(predecessor, (title, subtitle, meta, decoration))

1626 # A transition following these elements still counts as

1627 # "at the beginning of a document or section".

1628 ):

1629 messages.append(

1630 '<transition> may not begin a section or document.')

1631 if self.parent.index(self) == len(self.parent) - 1:

1632 messages.append('<transition> may not end a section or document.')

1633 if isinstance(predecessor, transition):

1634 messages.append(

1635 '<transition> may not directly follow another transition.')

1636 if len(messages) > 1:

1637 raise ValidationError('\n '.join(messages),

1638 problematic_element=self)

1639

1640

1641# Structural Elements

1642# ===================

1643

1644class topic(Structural, Element):

1645 """

1646 Topics__ are non-recursive, mini-sections.

1647

1648 __ https://docutils.sourceforge.io/docs/ref/doctree.html#topic

1649 """

1650 content_model: Final = ((title, '?'), (Body, '+'))

1651 # (title?, (%body.elements;)+)

1652

1653

1654class sidebar(Structural, Element):

1655 """

1656 Sidebars__ are like parallel documents providing related material.

1657

1658 A sidebar is typically offset by a border and "floats" to the side

1659 of the page

1660

1661 __ https://docutils.sourceforge.io/docs/ref/doctree.html#sidebar

1662 """

1663 content_model: Final = ((title, '?'),

1664 (subtitle, '?'),

1665 ((topic, Body), '+'),

1666 )

1667 # ((title, subtitle?)?, (%body.elements; | topic)+)

1668 # "subtitle only after title" is ensured in `subtitle.validate_position()`.

1669

1670

1671class section(Structural, Element):

1672 """Document section__. The main unit of hierarchy.

1673

1674 __ https://docutils.sourceforge.io/docs/ref/doctree.html#section

1675 """

1676 # recursive content model, see below

1677

1678

1679section.content_model = ((title, '.'),

1680 (subtitle, '?'),

1681 ((Body, topic, sidebar, transition), '*'),

1682 ((section, transition), '*'),

1683 )

1684# (title, subtitle?, %structure.model;)

1685# Correct transition placement is ensured in `transition.validate_position()`.

1686

1687

1688# Root Element

1689# ============

1690

1691class document(Root, Element):

1692 """

1693 The document root element.

1694

1695 Do not instantiate this class directly; use

1696 `docutils.utils.new_document()` instead.

1697 """

1698 valid_attributes: Final = Element.valid_attributes + ('title',)

1699 content_model: Final = ((title, '?'),

1700 (subtitle, '?'),

1701 (meta, '*'),

1702 (decoration, '?'),

1703 (docinfo, '?'),

1704 (transition, '?'),

1705 ((Body, topic, sidebar, transition), '*'),

1706 ((section, transition), '*'),

1707 )

1708 # ( (title, subtitle?)?,

1709 # meta*,

1710 # decoration?,

1711 # (docinfo, transition?)?,

1712 # %structure.model; )

1713 # Additional restrictions for `subtitle` and `transition` are tested

1714 # with the respective `validate_position()` methods.

1715

1716 def __init__(self,

1717 settings: Values,

1718 reporter: Reporter,

1719 *args,

1720 **kwargs: Any,

1721 ) -> None:

1722 Element.__init__(self, *args, **kwargs)

1723

1724 self.current_source: StrPath | None = None

1725 """Path to or description of the input source being processed."""

1726

1727 self.current_line: int | None = None

1728 """Line number (1-based) of `current_source`."""

1729

1730 self.settings: Values = settings

1731 """Runtime settings data record."""

1732

1733 self.reporter: Reporter = reporter

1734 """System message generator."""

1735

1736 self.indirect_targets: list[target] = []

1737 """List of indirect target nodes."""

1738

1739 self.substitution_defs: dict[str, substitution_definition] = {}

1740 """Mapping of substitution names to substitution_definition nodes."""

1741

1742 self.substitution_names: dict[str, str] = {}

1743 """Mapping of case-normalized to case-sensitive substitution names."""

1744

1745 self.refnames: dict[str, list[Element]] = {}

1746 """Mapping of names to lists of referencing nodes."""

1747

1748 self.refids: dict[str, list[Element]] = {}

1749 """(Incomplete) Mapping of ids to lists of referencing nodes."""

1750

1751 self.nameids: dict[str, str] = {}

1752 """Mapping of names to unique id's."""

1753

1754 self.nametypes: dict[str, bool] = {}

1755 """Mapping of names to hyperlink type. True: explicit, False: implicit.

1756 """

1757

1758 self.ids: dict[str, Element] = {}

1759 """Mapping of ids to nodes."""

1760

1761 self.footnote_refs: dict[str, list[footnote_reference]] = {}

1762 """Mapping of footnote labels to lists of footnote_reference nodes."""

1763

1764 self.citation_refs: dict[str, list[citation_reference]] = {}

1765 """Mapping of citation labels to lists of citation_reference nodes."""

1766

1767 self.autofootnotes: list[footnote] = []

1768 """List of auto-numbered footnote nodes."""

1769

1770 self.autofootnote_refs: list[footnote_reference] = []

1771 """List of auto-numbered footnote_reference nodes."""

1772

1773 self.symbol_footnotes: list[footnote] = []

1774 """List of symbol footnote nodes."""

1775

1776 self.symbol_footnote_refs: list[footnote_reference] = []

1777 """List of symbol footnote_reference nodes."""

1778

1779 self.footnotes: list[footnote] = []

1780 """List of manually-numbered footnote nodes."""

1781

1782 self.citations: list[citation] = []

1783 """List of citation nodes."""

1784

1785 self.autofootnote_start: int = 1

1786 """Initial auto-numbered footnote number."""

1787

1788 self.symbol_footnote_start: int = 0

1789 """Initial symbol footnote symbol index."""

1790

1791 self.id_counter: Counter[int] = Counter()

1792 """Numbers added to otherwise identical IDs."""

1793

1794 self.parse_messages: list[system_message] = []

1795 """System messages generated while parsing."""

1796

1797 self.transform_messages: list[system_message] = []

1798 """System messages generated while applying transforms."""

1799

1800 import docutils.transforms

1801 self.transformer: Transformer = docutils.transforms.Transformer(self)

1802 """Storage for transforms to be applied to this document."""

1803

1804 self.include_log: list[tuple[StrPath, tuple]] = []

1805 """The current source's parents (to detect inclusion loops)."""

1806

1807 self.decoration: decoration | None = None

1808 """Document's `decoration` node."""

1809

1810 self._document: document = self

1811

1812 def __getstate__(self) -> dict[str, Any]:

1813 """

1814 Return dict with unpicklable references removed.

1815 """

1816 state = self.__dict__.copy()

1817 state['reporter'] = None

1818 state['transformer'] = None

1819 return state

1820

1821 def asdom(self, dom: ModuleType | None = None) -> minidom.Document:

1822 """Return a DOM representation of this document."""

1823 if dom is None:

1824 import xml.dom.minidom as dom

1825 domroot = dom.Document()

1826 domroot.appendChild(self._dom_node(domroot))

1827 return domroot

1828

1829 def set_id(self,

1830 node: Element,

1831 msgnode: Element | None = None,

1832 suggested_prefix: str = '',

1833 ) -> str:

1834 if node['ids']:

1835 # register and check for duplicates

1836 for id in node['ids']:

1837 self.ids.setdefault(id, node)

1838 if self.ids[id] is not node:

1839 msg = self.reporter.error(f'Duplicate ID: "{id}" used by '

1840 f'{self.ids[id].starttag()} '

1841 f'and {node.starttag()}',

1842 base_node=node)

1843 if msgnode is not None:

1844 msgnode += msg

1845 return id

1846 # generate and set id

1847 id_prefix = self.settings.id_prefix

1848 auto_id_prefix = self.settings.auto_id_prefix

1849 base_id = ''

1850 id = ''

1851 for name in node['names']:

1852 if id_prefix: # allow names starting with numbers

1853 base_id = make_id('x'+name)[1:]

1854 else:

1855 base_id = make_id(name)

1856 # TODO: normalize id-prefix? (would make code simpler)

1857 id = id_prefix + base_id

1858 if base_id and id not in self.ids:

1859 break

1860 else:

1861 if base_id and auto_id_prefix.endswith('%'):

1862 # disambiguate name-derived ID

1863 # TODO: remove second condition after announcing change

1864 prefix = id + '-'

1865 else:

1866 prefix = id_prefix + auto_id_prefix

1867 if prefix.endswith('%'):

1868 prefix = f"""{prefix[:-1]}{suggested_prefix

1869 or make_id(node.tagname)}-"""

1870 while True:

1871 self.id_counter[prefix] += 1

1872 id = f'{prefix}{self.id_counter[prefix]}'

1873 if id not in self.ids:

1874 break

1875 node['ids'].append(id)

1876 self.ids[id] = node

1877 return id

1878

1879 def set_name_id_map(self,

1880 node: Element,

1881 id: str,

1882 msgnode: Element | None = None,

1883 explicit: bool = False,

1884 ) -> None:

1885 """

1886 Update the name/id mappings.

1887

1888 `self.nameids` maps names to IDs. The value ``None`` indicates

1889 that the name is a "dupname" (i.e. there are already at least

1890 two targets with the same name and type).

1891

1892 `self.nametypes` maps names to booleans representing

1893 hyperlink target type (True==explicit, False==implicit).

1894

1895 The following state transition table shows how `self.nameids` items

1896 ("id") and `self.nametypes` items ("type") change with new input

1897 (a call to this method), and what actions are performed:

1898

1899 ======== ==== ======== ==== ======== ======== ======= ======

1900 Input Old State New State Action Notes

1901 -------- -------------- -------------- ---------------- ------

1902 type id type id type dupname report

1903 ======== ==== ======== ==== ======== ======== ======= ======

1904 explicit new explicit

1905 implicit new implicit

1906 explicit old explicit None explicit new,old WARNING [#ex]_

1907 implicit old explicit old explicit new INFO [#ex]_

1908 explicit old implicit new explicit old INFO [#ex]_

1909 implicit old implicit None implicit new,old INFO [#ex]_

1910 explicit None explicit None explicit new WARNING

1911 implicit None explicit None explicit new INFO

1912 explicit None implicit new explicit

1913 implicit None implicit None implicit new INFO

1914 ======== ==== ======== ==== ======== ======== ======= ======

1915

1916 .. [#] Do not clear the name-to-id map or invalidate the old target if

1917 both old and new targets refer to identical URIs or reference names.

1918 The new target is invalidated regardless.

1919

1920 Provisional. There will be changes to prefer explicit reference names

1921 as base for an element's ID.

1922 """

1923 for name in tuple(node['names']):

1924 if name in self.nameids:

1925 self.set_duplicate_name_id(node, id, name, msgnode, explicit)

1926 # attention: modifies node['names']

1927 else:

1928 self.nameids[name] = id

1929 self.nametypes[name] = explicit

1930

1931 def set_duplicate_name_id(self,

1932 node: Element,

1933 id: str,

1934 name: str,

1935 msgnode: Element,

1936 explicit: bool,

1937 ) -> None:

1938 old_id = self.nameids[name] # None if name is only dupname

1939 old_explicit = self.nametypes[name]

1940 old_node = self.ids.get(old_id)

1941 level = 0 # system message level: 1-info, 2-warning

1942

1943 self.nametypes[name] = old_explicit or explicit

1944

1945 if old_id is not None and (

1946 'refname' in node and node['refname'] == old_node.get('refname')

1947 or 'refuri' in node and node['refuri'] == old_node.get('refuri')

1948 ):

1949 # indirect targets with same reference -> keep old target

1950 level = 1

1951 ref = node.get('refuri') or node.get('refname')

1952 s = f'Duplicate name "{name}" for external target "{ref}".'

1953 dupname(node, name)

1954 elif explicit:

1955 if old_explicit:

1956 level = 2

1957 s = f'Duplicate explicit target name: "{name}".'

1958 dupname(node, name)

1959 if old_id is not None:

1960 dupname(old_node, name)

1961 self.nameids[name] = None

1962 else: # new explicit, old implicit -> override

1963 self.nameids[name] = id

1964 if old_id is not None:

1965 level = 1

1966 s = f'Target name overrides implicit target name "{name}".'

1967 dupname(old_node, name)

1968 else: # new name is implicit

1969 level = 1

1970 s = f'Duplicate implicit target name: "{name}".'

1971 dupname(node, name)

1972 if old_id is not None and not old_explicit:

1973 dupname(old_node, name)

1974 self.nameids[name] = None

1975

1976 if level:

1977 backrefs = [id]

1978 # don't add backref id for empty targets (not shown in output)

1979 if isinstance(node, target) and 'refuri' in node:

1980 backrefs = []

1981 msg = self.reporter.system_message(level, s,

1982 backrefs=backrefs,

1983 base_node=node)

1984 # try appending near to the problem:

1985 if msgnode is not None:

1986 msgnode += msg

1987 try:

1988 msgnode.validate(recursive=False)

1989 except ValidationError:

1990 # detach -> will be handled by `Messages` transform

1991 msgnode.pop()

1992 msg.parent = None

1993

1994 def has_name(self, name: str) -> bool:

1995 return name in self.nameids

1996

1997 # "note" here is an imperative verb: "take note of".

1998 def note_implicit_target(

1999 self, target: Element, msgnode: Element | None = None) -> None:

2000 # TODO: Postpone ID creation and register reference name instead of ID?

2001 id = self.set_id(target, msgnode)

2002 self.set_name_id_map(target, id, msgnode, explicit=False)

2003

2004 def note_explicit_target(

2005 self, target: Element, msgnode: Element | None = None) -> None:

2006 # TODO: if the id matching the name is applied to an implicid target,

2007 # transfer it to this target and put a "disambiguated" id on the other.

2008 id = self.set_id(target, msgnode)

2009 self.set_name_id_map(target, id, msgnode, explicit=True)

2010

2011 def note_refname(self, node: Element) -> None:

2012 self.refnames.setdefault(node['refname'], []).append(node)

2013

2014 def note_refid(self, node: Element) -> None:

2015 self.refids.setdefault(node['refid'], []).append(node)

2016

2017 def note_indirect_target(self, target: target) -> None:

2018 self.indirect_targets.append(target)

2019 if target['names']:

2020 self.note_refname(target)

2021

2022 def note_anonymous_target(self, target: target) -> None:

2023 self.set_id(target)

2024

2025 def note_autofootnote(self, footnote: footnote) -> None:

2026 self.set_id(footnote)

2027 self.autofootnotes.append(footnote)

2028

2029 def note_autofootnote_ref(self, ref: footnote_reference) -> None:

2030 self.set_id(ref)

2031 self.autofootnote_refs.append(ref)

2032

2033 def note_symbol_footnote(self, footnote: footnote) -> None:

2034 self.set_id(footnote)

2035 self.symbol_footnotes.append(footnote)

2036

2037 def note_symbol_footnote_ref(self, ref: footnote_reference) -> None:

2038 self.set_id(ref)

2039 self.symbol_footnote_refs.append(ref)

2040

2041 def note_footnote(self, footnote: footnote) -> None:

2042 self.set_id(footnote)

2043 self.footnotes.append(footnote)

2044

2045 def note_footnote_ref(self, ref: footnote_reference) -> None:

2046 self.set_id(ref)

2047 self.footnote_refs.setdefault(ref['refname'], []).append(ref)

2048 self.note_refname(ref)

2049

2050 def note_citation(self, citation: citation) -> None:

2051 self.citations.append(citation)

2052

2053 def note_citation_ref(self, ref: citation_reference) -> None:

2054 self.set_id(ref)

2055 self.citation_refs.setdefault(ref['refname'], []).append(ref)

2056 self.note_refname(ref)

2057

2058 def note_substitution_def(self,

2059 subdef: substitution_definition,

2060 def_name: str,

2061 msgnode: Element | None = None,

2062 ) -> None:

2063 name = whitespace_normalize_name(def_name)

2064 if name in self.substitution_defs:

2065 msg = self.reporter.error(

2066 'Duplicate substitution definition name: "%s".' % name,

2067 base_node=subdef)

2068 if msgnode is not None:

2069 msgnode += msg

2070 oldnode = self.substitution_defs[name]

2071 dupname(oldnode, name)

2072 # keep only the last definition:

2073 self.substitution_defs[name] = subdef

2074 # case-insensitive mapping:

2075 self.substitution_names[fully_normalize_name(name)] = name

2076

2077 def note_substitution_ref(self,

2078 subref: substitution_reference,

2079 refname: str,

2080 ) -> None:

2081 subref['refname'] = whitespace_normalize_name(refname)

2082

2083 def note_pending(

2084 self, pending: pending, priority: int | None = None) -> None:

2085 self.transformer.add_pending(pending, priority)

2086

2087 def note_parse_message(self, message: system_message) -> None:

2088 self.parse_messages.append(message)

2089

2090 def note_transform_message(self, message: system_message) -> None:

2091 self.transform_messages.append(message)

2092

2093 def note_source(self,

2094 source: StrPath | None,

2095 offset: int | None,

2096 ) -> None:

2097 self.current_source = source and os.fspath(source)

2098 if offset is None:

2099 self.current_line = offset

2100 else:

2101 self.current_line = offset + 1

2102

2103 def copy(self) -> Self:

2104 obj = self.__class__(self.settings, self.reporter,

2105 **self.attributes)

2106 obj.source = self.source

2107 obj.line = self.line

2108 return obj

2109

2110 def get_decoration(self) -> decoration:

2111 if not self.decoration:

2112 self.decoration: decoration = decoration()

2113 index = self.first_child_not_matching_class((Titular, meta))

2114 if index is None:

2115 self.append(self.decoration)

2116 else:

2117 self.insert(index, self.decoration)

2118 return self.decoration

2119

2120

2121# Bibliographic Elements

2122# ======================

2123

2124class author(Bibliographic, TextElement): pass

2125class organization(Bibliographic, TextElement): pass

2126class address(Bibliographic, FixedTextElement): pass

2127class contact(Bibliographic, TextElement): pass

2128class version(Bibliographic, TextElement): pass

2129class revision(Bibliographic, TextElement): pass

2130class status(Bibliographic, TextElement): pass

2131class date(Bibliographic, TextElement): pass

2132class copyright(Bibliographic, TextElement): pass # NoQA: A001 (builtin name)

2133

2134

2135class authors(Bibliographic, Element):

2136 """Container for author information for documents with multiple authors.

2137 """

2138 content_model: Final = ((author, '+'),

2139 (organization, '?'),

2140 (address, '?'),

2141 (contact, '?'),

2142 )

2143 # (author, organization?, address?, contact?)+

2144

2145 def validate_content(self,

2146 model: _ContentModelTuple | None = None,

2147 elements: Sequence | None = None,

2148 ) -> list:

2149 """Repeatedly test for children matching the content model.

2150

2151 Provisional.

2152 """

2153 relics = super().validate_content()

2154 while relics:

2155 relics = super().validate_content(elements=relics)

2156 return relics

2157

2158

2159# Body Elements

2160# =============

2161#

2162# General

2163# -------

2164#

2165# Miscellaneous Body Elements and related Body Subelements (Part)

2166

2167class paragraph(General, TextElement): pass

2168class rubric(Titular, General, TextElement): pass

2169

2170

2171class compound(General, Element):

2172 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2173

2174

2175class container(General, Element):

2176 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2177

2178

2179class attribution(Part, TextElement):

2180 """Visible reference to the source of a `block_quote`."""

2181

2182

2183class block_quote(General, Element):

2184 """An extended quotation, set off from the main text."""

2185 content_model: Final = ((Body, '+'), (attribution, '?'))

2186 # ((%body.elements;)+, attribution?)

2187

2188

2189class reference(General, Inline, Referential, TextElement):

2190 valid_attributes: Final = Element.valid_attributes + (

2191 'anonymous', 'name', 'refid', 'refname', 'refuri')

2192

2193

2194# Lists

2195# -----

2196#

2197# Lists (Sequential) and related Body Subelements (Part)

2198

2199class list_item(Part, Element):

2200 content_model: Final = ((Body, '*'),) # (%body.elements;)*

2201

2202

2203class bullet_list(Sequential, Element):

2204 valid_attributes: Final = Element.valid_attributes + ('bullet',)

2205 content_model: Final = ((list_item, '+'),) # (list_item+)

2206

2207

2208class enumerated_list(Sequential, Element):

2209 valid_attributes: Final = Element.valid_attributes + (

2210 'enumtype', 'prefix', 'suffix', 'start')

2211 content_model: Final = ((list_item, '+'),) # (list_item+)

2212

2213

2214class term(Part, TextElement): pass

2215class classifier(Part, TextElement): pass

2216

2217

2218class definition(Part, Element):

2219 """Definition of a `term` in a `definition_list`."""

2220 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2221

2222

2223class definition_list_item(Part, Element):

2224 content_model: Final = ((term, '.'),

2225 ((classifier, term), '*'),

2226 (definition, '.'),

2227 )

2228 # ((term, classifier*)+, definition)

2229

2230

2231class definition_list(Sequential, Element):

2232 """List of terms and their definitions.

2233

2234 Can be used for glossaries or dictionaries, to describe or

2235 classify things, for dialogues, or to itemize subtopics.

2236 """

2237 content_model: Final = ((definition_list_item, '+'),)

2238 # (definition_list_item+)

2239

2240

2241class field_name(Part, TextElement): pass

2242

2243

2244class field_body(Part, Element):

2245 content_model: Final = ((Body, '*'),) # (%body.elements;)*

2246

2247

2248class field(Part, Bibliographic, Element):

2249 content_model: Final = ((field_name, '.'), (field_body, '.'))

2250 # (field_name, field_body)

2251

2252

2253class field_list(Sequential, Element):

2254 """List of label & data pairs.

2255

2256 Typically rendered as a two-column list.

2257 Also used for extension syntax or special processing.

2258 """

2259 content_model: Final = ((field, '+'),) # (field+)

2260

2261

2262class option_string(Part, PureTextElement):

2263 """A literal command-line option. Typically monospaced."""

2264

2265

2266class option_argument(Part, PureTextElement):

2267 """Placeholder text for option arguments."""

2268 valid_attributes: Final = Element.valid_attributes + ('delimiter',)

2269

2270 def astext(self) -> str:

2271 return self.get('delimiter', ' ') + TextElement.astext(self)

2272

2273

2274class option(Part, Element):

2275 """Option element in an `option_list_item`.

2276

2277 Groups an option string with zero or more option argument placeholders.

2278 """

2279 child_text_separator: Final = ''

2280 content_model: Final = ((option_string, '.'), (option_argument, '*'))

2281 # (option_string, option_argument*)

2282

2283

2284class option_group(Part, Element):

2285 """Groups together one or more `option` elements, all synonyms."""

2286 child_text_separator: Final = ', '

2287 content_model: Final = ((option, '+'),) # (option+)

2288

2289

2290class description(Part, Element):

2291 """Describtion of a command-line option."""

2292 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2293

2294

2295class option_list_item(Part, Element):

2296 """Container for a pair of `option_group` and `description` elements.

2297 """

2298 child_text_separator: Final = ' '

2299 content_model: Final = ((option_group, '.'), (description, '.'))

2300 # (option_group, description)

2301

2302

2303class option_list(Sequential, Element):

2304 """Two-column list of command-line options and descriptions."""

2305 content_model: Final = ((option_list_item, '+'),) # (option_list_item+)

2306

2307

2308# Pre-formatted text blocks

2309# -------------------------

2310

2311class literal_block(General, FixedTextElement): pass

2312class doctest_block(General, FixedTextElement): pass

2313

2314

2315class math_block(General, FixedTextElement, PureTextElement):

2316 """Mathematical notation (display formula)."""

2317

2318

2319class line(Part, TextElement):

2320 """Single line of text in a `line_block`."""

2321 indent: str | None = None

2322

2323

2324class line_block(General, Element):

2325 """Sequence of lines and nested line blocks.

2326 """

2327 # recursive content model: (line | line_block)+

2328

2329

2330line_block.content_model = (((line, line_block), '+'),)

2331

2332

2333# Admonitions

2334# -----------

2335# distinctive and self-contained notices

2336

2337class attention(Admonition, Element): pass

2338class caution(Admonition, Element): pass

2339class danger(Admonition, Element): pass

2340class error(Admonition, Element): pass

2341class important(Admonition, Element): pass

2342class note(Admonition, Element): pass

2343class tip(Admonition, Element): pass

2344class hint(Admonition, Element): pass

2345class warning(Admonition, Element): pass

2346

2347

2348class admonition(Admonition, Element):

2349 content_model: Final = ((title, '.'), (Body, '+'))

2350 # (title, (%body.elements;)+)

2351

2352

2353# Footnote and citation

2354# ---------------------

2355

2356class label(Part, PureTextElement):

2357 """Visible identifier for footnotes and citations."""

2358

2359

2360class footnote(General, BackLinkable, Element, Labeled, Targetable):

2361 """Labelled note providing additional context (footnote or endnote)."""

2362 valid_attributes: Final = Element.valid_attributes + ('auto', 'backrefs')

2363 content_model: Final = ((label, '?'), (Body, '+'))

2364 # (label?, (%body.elements;)+)

2365 # The label will become required in Docutils 1.0.

2366

2367

2368class citation(General, BackLinkable, Element, Labeled, Targetable):

2369 content_model: Final = ((label, '.'), (Body, '+'))

2370 # (label, (%body.elements;)+)

2371

2372

2373# Graphical elements

2374# ------------------

2375

2376class image(General, Inline, Element):

2377 """Reference to an image resource.

2378

2379 May be body element or inline element.

2380 """

2381 valid_attributes: Final = Element.valid_attributes + (

2382 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')

2383

2384 def astext(self) -> str:

2385 return self.get('alt', '')

2386

2387

2388class caption(Part, TextElement): pass

2389

2390

2391class legend(Part, Element):

2392 """A wrapper for text accompanying a `figure` that is not the caption."""

2393 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2394

2395

2396class figure(General, Element):

2397 """A formal figure, generally an illustration, with a title."""

2398 valid_attributes: Final = Element.valid_attributes + ('align', 'width')

2399 content_model: Final = (((image, reference), '.'),

2400 (caption, '?'),

2401 (legend, '?'),

2402 )

2403 # (image, ((caption, legend?) | legend))

2404 # TODO: According to the DTD, a caption or legend is required

2405 # but rST allows "bare" figures which are formatted differently from

2406 # images (floating in LaTeX, nested in a <figure> in HTML). [bugs: #489]

2407

2408

2409# Tables

2410# ------

2411

2412class entry(Part, Element):

2413 """An entry in a `row` (a table cell)."""

2414 valid_attributes: Final = Element.valid_attributes + (

2415 'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',

2416 'morerows', 'namest', 'nameend', 'rowsep', 'valign')

2417 content_model: Final = ((Body, '*'),)

2418 # %tbl.entry.mdl -> (%body.elements;)*

2419

2420

2421class row(Part, Element):

2422 """Row of table cells."""

2423 valid_attributes: Final = Element.valid_attributes + ('rowsep', 'valign')

2424 content_model: Final = ((entry, '+'),) # (%tbl.row.mdl;) -> entry+

2425

2426

2427class colspec(Part, Element):

2428 """Specifications for a column in a `tgroup`."""

2429 valid_attributes: Final = Element.valid_attributes + (

2430 'align', 'char', 'charoff', 'colname', 'colnum',

2431 'colsep', 'colwidth', 'rowsep', 'stub')

2432

2433 def propwidth(self) -> int|float:

2434 """Return numerical value of "colwidth__" attribute. Default 1.

2435

2436 Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.

2437

2438 Provisional.

2439

2440 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth

2441 """

2442 # Move current implementation of validate_colwidth() here

2443 # in Docutils 1.0

2444 return validate_colwidth(self.get('colwidth', ''))

2445

2446

2447class thead(Part, Element):

2448 """Row(s) that form the head of a `tgroup`."""

2449 valid_attributes: Final = Element.valid_attributes + ('valign',)

2450 content_model: Final = ((row, '+'),) # (row+)

2451

2452

2453class tbody(Part, Element):

2454 """Body of a `tgroup`."""

2455 valid_attributes: Final = Element.valid_attributes + ('valign',)

2456 content_model: Final = ((row, '+'),) # (row+)

2457

2458

2459class tgroup(Part, Element):

2460 """A portion of a table. Most tables have just one `tgroup`."""

2461 valid_attributes: Final = Element.valid_attributes + (

2462 'align', 'cols', 'colsep', 'rowsep')

2463 content_model: Final = ((colspec, '*'), (thead, '?'), (tbody, '.'))

2464 # (colspec*, thead?, tbody)

2465

2466

2467class table(General, Element):

2468 """A data arrangement with rows and columns."""

2469 valid_attributes: Final = Element.valid_attributes + (

2470 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')

2471 content_model: Final = ((title, '?'), (tgroup, '+'))

2472 # (title?, tgroup+)

2473

2474

2475# Special purpose elements

2476# ------------------------

2477# Body elements for internal use or special requests.

2478

2479class comment(Invisible, FixedTextElement, PureTextElement):

2480 """Author notes, hidden from the output."""

2481

2482

2483class substitution_definition(Invisible, TextElement):

2484 valid_attributes: Final = Element.valid_attributes + ('ltrim', 'rtrim')

2485

2486

2487class target(Invisible, Inline, TextElement, Targetable):

2488 valid_attributes: Final = Element.valid_attributes + (

2489 'anonymous', 'refid', 'refname', 'refuri')

2490

2491

2492class system_message(Special, BackLinkable, PreBibliographic, Element):

2493 """

2494 System message element.

2495

2496 Do not instantiate this class directly; use

2497 ``document.reporter.info/warning/error/severe()`` instead.

2498 """

2499 valid_attributes: Final = BackLinkable.valid_attributes + (

2500 'level', 'line', 'type')

2501 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2502

2503 def __init__(self,

2504 message: str | None = None,

2505 *children,

2506 **attributes: Any,

2507 ) -> None:

2508 rawsource = attributes.pop('rawsource', '')

2509 if message:

2510 p = paragraph('', message)

2511 children = (p,) + children

2512 try:

2513 Element.__init__(self, rawsource, *children, **attributes)

2514 except: # NoQA: E722 (catchall)

2515 print('system_message: children=%r' % (children,))

2516 raise

2517

2518 def astext(self) -> str:

2519 line = self.get('line', '')

2520 return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],

2521 self['level'], Element.astext(self))

2522

2523

2524class pending(Invisible, Element):

2525 """

2526 Placeholder for pending operations.

2527

2528 The "pending" element is used to encapsulate a pending operation: the

2529 operation (transform), the point at which to apply it, and any data it

2530 requires. Only the pending operation's location within the document is

2531 stored in the public document tree (by the "pending" object itself); the

2532 operation and its data are stored in the "pending" object's internal

2533 instance attributes.

2534

2535 For example, say you want a table of contents in your reStructuredText

2536 document. The easiest way to specify where to put it is from within the

2537 document, with a directive::

2538

2539 .. contents::

2540

2541 But the "contents" directive can't do its work until the entire document

2542 has been parsed and possibly transformed to some extent. So the directive

2543 code leaves a placeholder behind that will trigger the second phase of its

2544 processing, something like this::

2545

2546 <pending ...public attributes...> + internal attributes

2547

2548 Use `document.note_pending()` so that the

2549 `docutils.transforms.Transformer` stage of processing can run all pending

2550 transforms.

2551 """

2552

2553 def __init__(self,

2554 transform: Transform,

2555 details: Mapping[str, Any] | None = None,

2556 rawsource: str = '',

2557 *children,

2558 **attributes: Any,

2559 ) -> None:

2560 Element.__init__(self, rawsource, *children, **attributes)

2561

2562 self.transform: Transform = transform

2563 """The `docutils.transforms.Transform` class implementing the pending

2564 operation."""

2565

2566 self.details: Mapping[str, Any] = details or {}

2567 """Detail data (dictionary) required by the pending operation."""

2568

2569 def pformat(self, indent: str = ' ', level: int = 0) -> str:

2570 internals = ['.. internal attributes:',

2571 ' .transform: %s.%s' % (self.transform.__module__,

2572 self.transform.__name__),

2573 ' .details:']

2574 details = sorted(self.details.items())

2575 for key, value in details:

2576 if isinstance(value, Node):

2577 internals.append('%7s%s:' % ('', key))

2578 internals.extend(['%9s%s' % ('', line)

2579 for line in value.pformat().splitlines()])

2580 elif (value

2581 and isinstance(value, list)

2582 and isinstance(value[0], Node)):

2583 internals.append('%7s%s:' % ('', key))

2584 for v in value:

2585 internals.extend(['%9s%s' % ('', line)

2586 for line in v.pformat().splitlines()])

2587 else:

2588 internals.append('%7s%s: %r' % ('', key, value))

2589 return (Element.pformat(self, indent, level)

2590 + ''.join((' %s%s\n' % (indent * level, line))

2591 for line in internals))

2592

2593 def copy(self) -> Self:

2594 obj = self.__class__(self.transform, self.details, self.rawsource,

2595 **self.attributes)

2596 obj._document = self._document

2597 obj.source = self.source

2598 obj.line = self.line

2599 return obj

2600

2601

2602class raw(Special, Inline, PreBibliographic,

2603 FixedTextElement, PureTextElement):

2604 """Raw data that is to be passed untouched to the Writer.

2605

2606 Can be used as Body element or Inline element.

2607 """

2608 valid_attributes: Final = Element.valid_attributes + (

2609 'format', 'xml:space')

2610

2611

2612# Inline Elements

2613# ===============

2614

2615class abbreviation(Inline, TextElement): pass

2616class acronym(Inline, TextElement): pass

2617class emphasis(Inline, TextElement): pass

2618class generated(Inline, TextElement): pass

2619class inline(Inline, TextElement): pass

2620class literal(Inline, TextElement): pass

2621class strong(Inline, TextElement): pass

2622class subscript(Inline, TextElement): pass

2623class superscript(Inline, TextElement): pass

2624class title_reference(Inline, TextElement): pass

2625

2626

2627class footnote_reference(Inline, Referential, PureTextElement):

2628 valid_attributes: Final = Element.valid_attributes + (

2629 'auto', 'refid', 'refname')

2630

2631

2632class citation_reference(Inline, Referential, PureTextElement):

2633 valid_attributes: Final = Element.valid_attributes + ('refid', 'refname')

2634

2635

2636class substitution_reference(Inline, TextElement):

2637 valid_attributes: Final = Element.valid_attributes + ('refname',)

2638

2639

2640class math(Inline, PureTextElement):

2641 """Mathematical notation in running text."""

2642

2643

2644class problematic(Inline, TextElement):

2645 valid_attributes: Final = Element.valid_attributes + (

2646 'refid', 'refname', 'refuri')

2647

2648

2649# ========================================

2650# Auxiliary Classes, Functions, and Data

2651# ========================================

2652

2653node_class_names: Sequence[str] = """

2654 Text

2655 abbreviation acronym address admonition attention attribution author

2656 authors

2657 block_quote bullet_list

2658 caption caution citation citation_reference classifier colspec comment

2659 compound contact container copyright

2660 danger date decoration definition definition_list definition_list_item

2661 description docinfo doctest_block document

2662 emphasis entry enumerated_list error

2663 field field_body field_list field_name figure footer

2664 footnote footnote_reference

2665 generated

2666 header hint

2667 image important inline

2668 label legend line line_block list_item literal literal_block

2669 math math_block meta

2670 note

2671 option option_argument option_group option_list option_list_item

2672 option_string organization

2673 paragraph pending problematic

2674 raw reference revision row rubric

2675 section sidebar status strong subscript substitution_definition

2676 substitution_reference subtitle superscript system_message

2677 table target tbody term tgroup thead tip title title_reference topic

2678 transition

2679 version

2680 warning""".split()

2681"""A list of names of all concrete Node subclasses."""

2682

2683

2684class NodeVisitor:

2685 """

2686 "Visitor" pattern [GoF95]_ abstract superclass implementation for

2687 document tree traversals.

2688

2689 Each node class has corresponding methods, doing nothing by

2690 default; override individual methods for specific and useful

2691 behaviour. The `dispatch_visit()` method is called by

2692 `Node.walk()` upon entering a node. `Node.walkabout()` also calls

2693 the `dispatch_departure()` method before exiting a node.

2694

2695 The dispatch methods call "``visit_`` + node class name" or

2696 "``depart_`` + node class name", resp.

2697

2698 This is a base class for visitors whose ``visit_...`` & ``depart_...``

2699 methods must be implemented for *all* compulsory node types encountered

2700 (such as for `docutils.writers.Writer` subclasses).

2701 Unimplemented methods will raise exceptions (except for optional nodes).

2702

2703 For sparse traversals, where only certain node types are of interest, use

2704 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform

2705 processing is desired, subclass `GenericNodeVisitor`.

2706

2707 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of

2708 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,

2709 1995.

2710 """

2711

2712 optional: ClassVar[tuple[str, ...]] = ('meta',)

2713 """

2714 Tuple containing node class names (as strings).

2715

2716 No exception will be raised if writers do not implement visit

2717 or departure functions for these node classes.

2718

2719 Used to ensure transitional compatibility with existing 3rd-party writers.

2720 """

2721

2722 def __init__(self, document: document, /) -> None:

2723 self.document: document = document

2724

2725 def dispatch_visit(self, node) -> None:

2726 """

2727 Call self."``visit_`` + node class name" with `node` as

2728 parameter. If the ``visit_...`` method does not exist, call

2729 self.unknown_visit.

2730 """

2731 node_name = node.__class__.__name__

2732 method = getattr(self, 'visit_' + node_name, self.unknown_visit)

2733 self.document.reporter.debug(

2734 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'

2735 % (method.__name__, node_name))

2736 return method(node)

2737

2738 def dispatch_departure(self, node) -> None:

2739 """

2740 Call self."``depart_`` + node class name" with `node` as

2741 parameter. If the ``depart_...`` method does not exist, call

2742 self.unknown_departure.

2743 """

2744 node_name = node.__class__.__name__

2745 method = getattr(self, 'depart_' + node_name, self.unknown_departure)

2746 self.document.reporter.debug(

2747 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'

2748 % (method.__name__, node_name))

2749 return method(node)

2750

2751 def unknown_visit(self, node) -> None:

2752 """

2753 Called when entering unknown `Node` types.

2754

2755 Raise an exception unless overridden.

2756 """

2757 if (self.document.settings.strict_visitor

2758 or node.__class__.__name__ not in self.optional):

2759 raise NotImplementedError(

2760 '%s visiting unknown node type: %s'

2761 % (self.__class__, node.__class__.__name__))

2762

2763 def unknown_departure(self, node) -> None:

2764 """

2765 Called before exiting unknown `Node` types.

2766

2767 Raise exception unless overridden.

2768 """

2769 if (self.document.settings.strict_visitor

2770 or node.__class__.__name__ not in self.optional):

2771 raise NotImplementedError(

2772 '%s departing unknown node type: %s'

2773 % (self.__class__, node.__class__.__name__))

2774

2775

2776class SparseNodeVisitor(NodeVisitor):

2777 """

2778 Base class for sparse traversals, where only certain node types are of

2779 interest. When ``visit_...`` & ``depart_...`` methods should be

2780 implemented for *all* node types (such as for `docutils.writers.Writer`

2781 subclasses), subclass `NodeVisitor` instead.

2782 """

2783

2784

2785class GenericNodeVisitor(NodeVisitor):

2786 """

2787 Generic "Visitor" abstract superclass, for simple traversals.

2788

2789 Unless overridden, each ``visit_...`` method calls `default_visit()`, and

2790 each ``depart_...`` method (when using `Node.walkabout()`) calls

2791 `default_departure()`. `default_visit()` (and `default_departure()`) must

2792 be overridden in subclasses.

2793

2794 Define fully generic visitors by overriding `default_visit()` (and

2795 `default_departure()`) only. Define semi-generic visitors by overriding

2796 individual ``visit_...()`` (and ``depart_...()``) methods also.

2797

2798 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should

2799 be overridden for default behavior.

2800 """

2801

2802 def default_visit(self, node):

2803 """Override for generic, uniform traversals."""

2804 raise NotImplementedError

2805

2806 def default_departure(self, node):

2807 """Override for generic, uniform traversals."""

2808 raise NotImplementedError

2809

2810

2811def _call_default_visit(self: GenericNodeVisitor, node) -> None:

2812 self.default_visit(node)

2813

2814

2815def _call_default_departure(self: GenericNodeVisitor, node) -> None:

2816 self.default_departure(node)

2817

2818

2819def _nop(self: SparseNodeVisitor, node) -> None:

2820 pass

2821

2822

2823def _add_node_class_names(names) -> None:

2824 """Save typing with dynamic assignments:"""

2825 for _name in names:

2826 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)

2827 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)

2828 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)

2829 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)

2830

2831

2832_add_node_class_names(node_class_names)

2833

2834

2835class TreeCopyVisitor(GenericNodeVisitor):

2836 """

2837 Make a complete copy of a tree or branch, including element attributes.

2838 """

2839

2840 def __init__(self, document: document) -> None:

2841 super().__init__(document)

2842 self.parent_stack: list[list] = []

2843 self.parent: list = []

2844

2845 def get_tree_copy(self):

2846 return self.parent[0]

2847

2848 def default_visit(self, node) -> None:

2849 """Copy the current node, and make it the new acting parent."""

2850 newnode = node.copy()

2851 self.parent.append(newnode)

2852 self.parent_stack.append(self.parent)

2853 self.parent = newnode

2854

2855 def default_departure(self, node) -> None:

2856 """Restore the previous acting parent."""

2857 self.parent = self.parent_stack.pop()

2858

2859

2860# Custom Exceptions

2861# =================

2862

2863class ValidationError(ValueError):

2864 """Invalid Docutils Document Tree Element."""

2865 def __init__(self, msg: str, problematic_element: Element = None) -> None:

2866 super().__init__(msg)

2867 self.problematic_element = problematic_element

2868

2869

2870class TreePruningException(Exception):

2871 """

2872 Base class for `NodeVisitor`-related tree pruning exceptions.

2873

2874 Raise subclasses from within ``visit_...`` or ``depart_...`` methods

2875 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune

2876 the tree traversed.

2877 """

2878

2879

2880class SkipChildren(TreePruningException):

2881 """

2882 Do not visit any children of the current node. The current node's

2883 siblings and ``depart_...`` method are not affected.

2884 """

2885

2886

2887class SkipSiblings(TreePruningException):

2888 """

2889 Do not visit any more siblings (to the right) of the current node. The

2890 current node's children and its ``depart_...`` method are not affected.

2891 """

2892

2893

2894class SkipNode(TreePruningException):

2895 """

2896 Do not visit the current node's children, and do not call the current

2897 node's ``depart_...`` method.

2898 """

2899

2900

2901class SkipDeparture(TreePruningException):

2902 """

2903 Do not call the current node's ``depart_...`` method. The current node's

2904 children and siblings are not affected.

2905 """

2906

2907

2908class NodeFound(TreePruningException):

2909 """

2910 Raise to indicate that the target of a search has been found. This

2911 exception must be caught by the client; it is not caught by the traversal

2912 code.

2913 """

2914

2915

2916class StopTraversal(TreePruningException):

2917 """

2918 Stop the traversal altogether. The current node's ``depart_...`` method

2919 is not affected. The parent nodes ``depart_...`` methods are also called

2920 as usual. No other nodes are visited. This is an alternative to

2921 NodeFound that does not cause exception handling to trickle up to the

2922 caller.

2923 """

2924

2925

2926# definition moved here from `utils` to avoid circular import dependency

2927def unescape(text: str,

2928 restore_backslashes: bool = False,

2929 respect_whitespace: bool = False,

2930 ) -> str:

2931 """

2932 Return a string with nulls removed or restored to backslashes.

2933 Backslash-escaped spaces are also removed.

2934 """

2935 # `respect_whitespace` is ignored (since introduction 2016-12-16)

2936 if restore_backslashes:

2937 return text.replace('\x00', '\\')

2938 else:

2939 for sep in ['\x00 ', '\x00\n', '\x00']:

2940 text = ''.join(text.split(sep))

2941 return text

2942

2943

2944def make_id(string: str) -> str:

2945 """

2946 Convert `string` into an identifier and return it.

2947

2948 Docutils identifiers will conform to the regular expression

2949 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"

2950 and "id" attributes) should have no underscores, colons, or periods.

2951 Hyphens may be used.

2952

2953 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:

2954

2955 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be

2956 followed by any number of letters, digits ([0-9]), hyphens ("-"),

2957 underscores ("_"), colons (":"), and periods (".").

2958

2959 - However the `CSS1 spec`_ defines identifiers based on the "name" token,

2960 a tighter interpretation ("flex" tokenizer notation; "latin1" and

2961 "escape" 8-bit characters have been replaced with entities)::

2962

2963 unicode \\[0-9a-f]{1,4}

2964 latin1 [¡-ÿ]

2965 escape {unicode}|\\[ -~¡-ÿ]

2966 nmchar [-a-z0-9]|{latin1}|{escape}

2967 name {nmchar}+

2968

2969 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),

2970 or periods ("."), therefore "class" and "id" attributes should not contain

2971 these characters. They should be replaced with hyphens ("-"). Combined

2972 with HTML's requirements (the first character must be a letter; no

2973 "unicode", "latin1", or "escape" characters), this results in the

2974 ``[a-z](-?[a-z0-9]+)*`` pattern.

2975

2976 .. _HTML 4.01 spec: https://www.w3.org/TR/html401

2977 .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1

2978 """

2979 id = string.lower()

2980 id = id.translate(_non_id_translate_digraphs)

2981 id = id.translate(_non_id_translate)

2982 # get rid of non-ascii characters.

2983 # 'ascii' lowercase to prevent problems with turkish locale.

2984 id = unicodedata.normalize(

2985 'NFKD', id).encode('ascii', 'ignore').decode('ascii')

2986 # shrink runs of whitespace and replace by hyphen

2987 id = _non_id_chars.sub('-', ' '.join(id.split()))

2988 id = _non_id_at_ends.sub('', id)

2989 return str(id)

2990

2991

2992_non_id_chars: re.Pattern[str] = re.compile('[^a-z0-9]+')

2993_non_id_at_ends: re.Pattern[str] = re.compile('^[-0-9]+|-+$')

2994_non_id_translate: dict[int, str] = {

2995 0x00f8: 'o', # o with stroke

2996 0x0111: 'd', # d with stroke

2997 0x0127: 'h', # h with stroke

2998 0x0131: 'i', # dotless i

2999 0x0142: 'l', # l with stroke

3000 0x0167: 't', # t with stroke

3001 0x0180: 'b', # b with stroke

3002 0x0183: 'b', # b with topbar

3003 0x0188: 'c', # c with hook

3004 0x018c: 'd', # d with topbar

3005 0x0192: 'f', # f with hook

3006 0x0199: 'k', # k with hook

3007 0x019a: 'l', # l with bar

3008 0x019e: 'n', # n with long right leg

3009 0x01a5: 'p', # p with hook

3010 0x01ab: 't', # t with palatal hook

3011 0x01ad: 't', # t with hook

3012 0x01b4: 'y', # y with hook

3013 0x01b6: 'z', # z with stroke

3014 0x01e5: 'g', # g with stroke

3015 0x0225: 'z', # z with hook

3016 0x0234: 'l', # l with curl

3017 0x0235: 'n', # n with curl

3018 0x0236: 't', # t with curl

3019 0x0237: 'j', # dotless j

3020 0x023c: 'c', # c with stroke

3021 0x023f: 's', # s with swash tail

3022 0x0240: 'z', # z with swash tail

3023 0x0247: 'e', # e with stroke

3024 0x0249: 'j', # j with stroke

3025 0x024b: 'q', # q with hook tail

3026 0x024d: 'r', # r with stroke

3027 0x024f: 'y', # y with stroke

3028}

3029_non_id_translate_digraphs: dict[int, str] = {

3030 0x00df: 'sz', # ligature sz

3031 0x00e6: 'ae', # ae

3032 0x0153: 'oe', # ligature oe

3033 0x0238: 'db', # db digraph

3034 0x0239: 'qp', # qp digraph

3035}

3036

3037

3038def dupname(node: Element, name: str) -> None:

3039 node['dupnames'].append(name)

3040 node['names'].remove(name)

3041 # Assume that `node` is referenced, even though it isn't;

3042 # we don't want to throw unnecessary system_messages.

3043 node.referenced = True

3044

3045

3046def fully_normalize_name(name: str) -> str:

3047 """Return a case- and whitespace-normalized name."""

3048 return ' '.join(name.lower().split())

3049

3050

3051def whitespace_normalize_name(name: str) -> str:

3052 """Return a whitespace-normalized name."""

3053 return ' '.join(name.split())

3054

3055

3056def serial_escape(value: str) -> str:

3057 """Escape string values that are elements of a list, for serialization."""

3058 return value.replace('\\', r'\\').replace(' ', r'\ ')

3059

3060

3061def split_name_list(s: str) -> list[str]:

3062 r"""Split a string at non-escaped whitespace.

3063

3064 Backslashes escape internal whitespace (cf. `serial_escape()`).

3065 Return list of "names" (after removing escaping backslashes).

3066

3067 >>> split_name_list(r'a\ n\ame two\\ n\\ames'),

3068 ['a name', 'two\\', r'n\ames']

3069

3070 Provisional.

3071 """

3072 s = s.replace('\\', '\x00') # escape with NULL char

3073 s = s.replace('\x00\x00', '\\') # unescape backslashes

3074 s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL

3075 names = s.split(' ')

3076 # restore internal spaces, drop other escaping characters

3077 return [name.replace('\x00\x00', ' ').replace('\x00', '')

3078 for name in names]

3079

3080

3081def pseudo_quoteattr(value: str) -> str:

3082 """Quote attributes for pseudo-xml"""

3083 return '"%s"' % value

3084

3085

3086def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'

3087 ) -> tuple[int|float, str]:

3088 """Parse a measure__, return value + unit.

3089

3090 `unit_pattern` is a regular expression describing recognized units.

3091 The default is suited for (but not limited to) CSS3 units and SI units.

3092 It matches runs of ASCII letters or Greek mu, a single percent sign,

3093 or no unit.

3094

3095 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure

3096

3097 Provisional.

3098 """

3099 match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)

3100 try:

3101 try:

3102 value = int(match.group(1))

3103 except ValueError:

3104 value = float(match.group(1))

3105 unit = match.group(2)

3106 except (AttributeError, ValueError):

3107 raise ValueError(f'"{measure}" is no valid measure.')

3108 return value, unit

3109

3110

3111# Methods to validate `Element attribute`__ values.

3112

3113# Ensure the expected Python `data type`__, normalize, and check for

3114# restrictions.

3115#

3116# The methods can be used to convert `str` values (eg. from an XML

3117# representation) or to validate an existing document tree or node.

3118#

3119# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,

3120# and the `attribute_validating_functions` mapping below.

3121#

3122# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference

3123# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types

3124

3125def create_keyword_validator(*keywords: str) -> Callable[[str], str]:

3126 """

3127 Return a function that validates a `str` against given `keywords`.

3128

3129 Provisional.

3130 """

3131 def validate_keywords(value: str) -> str:

3132 if value not in keywords:

3133 allowed = '", \"'.join(keywords)

3134 raise ValueError(f'"{value}" is not one of "{allowed}".')

3135 return value

3136 return validate_keywords

3137

3138

3139def validate_identifier(value: str) -> str:

3140 """

3141 Validate identifier key or class name.

3142

3143 Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.

3144

3145 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type

3146

3147 Provisional.

3148 """

3149 if value != make_id(value):

3150 raise ValueError(f'"{value}" is no valid id or class name.')

3151 return value

3152

3153

3154def validate_identifier_list(value: str | list[str]) -> list[str]:

3155 """

3156 A (space-separated) list of ids or class names.

3157

3158 `value` may be a `list` or a `str` with space separated

3159 ids or class names (cf. `validate_identifier()`).

3160

3161 Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.

3162

3163 __ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type

3164 __ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type

3165 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type

3166

3167 Provisional.

3168 """

3169 if isinstance(value, str):

3170 value = value.split()

3171 for token in value:

3172 validate_identifier(token)

3173 return value

3174

3175

3176def validate_measure(measure: str) -> str:

3177 """

3178 Validate a measure__ (number + optional unit). Return normalized `str`.

3179

3180 See `parse_measure()` for a function returning a "number + unit" tuple.

3181

3182 The unit may be a run of ASCII letters or Greek mu, a single percent sign,

3183 or the empty string. Case is preserved.

3184

3185 Provisional.

3186

3187 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure

3188 """

3189 value, unit = parse_measure(measure)

3190 return f'{value}{unit}'

3191

3192

3193def validate_colwidth(measure: str|int|float) -> int|float:

3194 """Validate the "colwidth__" attribute.

3195

3196 Provisional:

3197 `measure` must be a `str` and will be returned as normalized `str`

3198 (with unit "*" for proportional values) in Docutils 1.0.

3199

3200 The default unit will change to "pt" in Docutils 2.0.

3201

3202 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth

3203 """

3204 if isinstance(measure, (int, float)):

3205 value = measure

3206 elif measure in ('*', ''): # short for '1*'

3207 value = 1

3208 else:

3209 try:

3210 value, _unit = parse_measure(measure, unit_pattern='[*]?')

3211 except ValueError:

3212 value = -1

3213 if value <= 0:

3214 raise ValueError(f'"{measure}" is no proportional measure.')

3215 return value

3216

3217

3218def validate_NMTOKEN(value: str) -> str:

3219 """

3220 Validate a "name token": a `str` of ASCII letters, digits, and [-._].

3221

3222 Provisional.

3223 """

3224 if not re.fullmatch('[-._A-Za-z0-9]+', value):

3225 raise ValueError(f'"{value}" is no NMTOKEN.')

3226 return value

3227

3228

3229def validate_NMTOKENS(value: str | list[str]) -> list[str]:

3230 """

3231 Validate a list of "name tokens".

3232

3233 Provisional.

3234 """

3235 if isinstance(value, str):

3236 value = value.split()

3237 for token in value:

3238 validate_NMTOKEN(token)

3239 return value

3240

3241

3242def validate_refname_list(value: str | list[str]) -> list[str]:

3243 """

3244 Validate a list of `reference names`__.

3245

3246 Reference names may contain all characters;

3247 whitespace is normalized (cf, `whitespace_normalize_name()`).

3248

3249 `value` may be either a `list` of names or a `str` with

3250 space separated names (with internal spaces backslash escaped

3251 and literal backslashes doubled cf. `serial_escape()`).

3252

3253 Return a list of whitespace-normalized, unescaped reference names.

3254

3255 Provisional.

3256

3257 __ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name

3258 """

3259 if isinstance(value, str):

3260 value = split_name_list(value)

3261 return [whitespace_normalize_name(name) for name in value]

3262

3263

3264def validate_yesorno(value: str | int | bool) -> bool:

3265 """Validate a `%yesorno`__ (flag) value.

3266

3267 The string literal "0" evaluates to ``False``, all other

3268 values are converterd with `bool()`.

3269

3270 __ https://docutils.sourceforge.io/docs/ref/doctree.html#yesorno

3271 """

3272 if value == "0":

3273 return False

3274 return bool(value)

3275

3276

3277ATTRIBUTE_VALIDATORS: dict[str, Callable[[str], Any]] = {

3278 'alt': str, # CDATA

3279 'align': str,

3280 'anonymous': validate_yesorno,

3281 'auto': str, # CDATA (only '1' or '*' are used in rST)

3282 'backrefs': validate_identifier_list,

3283 'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)

3284 'classes': validate_identifier_list,

3285 'char': str, # from Exchange Table Model (CALS), currently ignored

3286 'charoff': validate_NMTOKEN, # from CALS, currently ignored

3287 'colname': validate_NMTOKEN, # from CALS, currently ignored

3288 'colnum': int, # from CALS, currently ignored

3289 'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".

3290 'colsep': validate_yesorno,

3291 'colwidth': validate_colwidth, # see docstring for pending changes

3292 'content': str, # <meta>

3293 'delimiter': str,

3294 'dir': create_keyword_validator('ltr', 'rtl', 'auto'), # <meta>

3295 'dupnames': validate_refname_list,

3296 'enumtype': create_keyword_validator('arabic', 'loweralpha', 'lowerroman',

3297 'upperalpha', 'upperroman'),

3298 'format': str, # CDATA (space separated format names)

3299 'frame': create_keyword_validator('top', 'bottom', 'topbot', 'all',

3300 'sides', 'none'), # from CALS, ignored

3301 'height': validate_measure,

3302 'http-equiv': str, # <meta>

3303 'ids': validate_identifier_list,

3304 'lang': str, # <meta>

3305 'level': int,

3306 'line': int,

3307 'ltrim': validate_yesorno,

3308 'loading': create_keyword_validator('embed', 'link', 'lazy'),

3309 'media': str, # <meta>

3310 'morecols': int,

3311 'morerows': int,

3312 'name': whitespace_normalize_name, # in <reference> (deprecated)

3313 # 'name': node_attributes.validate_NMTOKEN, # in <meta>

3314 'names': validate_refname_list,

3315 'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored

3316 'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored

3317 'pgwide': validate_yesorno, # from CALS, currently ignored

3318 'prefix': str,

3319 'refid': validate_identifier,

3320 'refname': whitespace_normalize_name,

3321 'refuri': str,

3322 'rowsep': validate_yesorno,

3323 'rtrim': validate_yesorno,

3324 'scale': int,

3325 'scheme': str,

3326 'source': str,

3327 'start': int,

3328 'stub': validate_yesorno,

3329 'suffix': str,

3330 'title': str,

3331 'type': validate_NMTOKEN,

3332 'uri': str,

3333 'valign': create_keyword_validator('top', 'middle', 'bottom'), # from CALS

3334 'width': validate_measure,

3335 'xml:space': create_keyword_validator('default', 'preserve'),

3336 }

3337"""

3338Mapping of `attribute names`__ to validating functions.

3339

3340Provisional.

3341

3342__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference

3343"""