Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/nodes.py: 62%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Maintainer: docutils-develop@lists.sourceforge.net

4# Copyright: This module has been placed in the public domain.

6"""

7Docutils document tree element class library.

9The relationships and semantics of elements and attributes is documented in

10`The Docutils Document Tree`__.

12Classes in CamelCase are abstract base classes or auxiliary classes. The one

13exception is `Text`, for a text (PCDATA) node; uppercase is used to

14differentiate from element classes. Classes in lower_case_with_underscores

15are element classes, matching the XML element generic identifiers in the DTD_.

17The position of each node (the level at which it can occur) is significant and

18is represented by abstract base classes (`Root`, `Structural`, `Body`,

19`Inline`, etc.). Certain transformations will be easier because we can use

20``isinstance(node, base_class)`` to determine the position of the node in the

21hierarchy.

23__ https://docutils.sourceforge.io/docs/ref/doctree.html

24.. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd

25"""

27from __future__ import annotations

29__docformat__ = 'reStructuredText'

31import os

32import re

33import sys

34import unicodedata

35import warnings

36from collections import Counter

37# import xml.dom.minidom as dom # -> conditional import in Node.asdom()

38# and document.asdom()

40# import docutils.transforms # -> delayed import in document.__init__()

42TYPE_CHECKING = False

43if TYPE_CHECKING:

44 from collections.abc import (Callable, Iterable, Iterator,

45 Mapping, Sequence)

46 from types import ModuleType

47 from typing import Any, ClassVar, Final, Literal, Self, SupportsIndex

49 from docutils.utils._typing import TypeAlias

51 from xml.dom import minidom

53 from docutils.frontend import Values

54 from docutils.transforms import Transformer, Transform

55 from docutils.utils import Reporter

57 _ContentModelCategory: TypeAlias = tuple['Element' | tuple['Element', ...]]

58 _ContentModelQuantifier = Literal['.', '?', '+', '*']

59 _ContentModelItem: TypeAlias = tuple[_ContentModelCategory,

60 _ContentModelQuantifier]

61 _ContentModelTuple: TypeAlias = tuple[_ContentModelItem, ...]

63 StrPath: TypeAlias = str | os.PathLike[str]

64 """File system path. No bytes!"""

66 _UpdateFun: TypeAlias = Callable[[str, Any, bool], None]

69# ==============================

70# Functional Node Base Classes

71# ==============================

73class Node:

74 """Abstract base class of nodes in a document tree."""

76 parent: Element | None = None

77 """Back-reference to the Node immediately containing this Node."""

79 children: Sequence # defined in subclasses

80 """List of child nodes (Elements or Text).

82 Override in subclass instances that are not terminal nodes.

83 """

85 source: StrPath | None = None

86 """Path or description of the input source which generated this Node."""

88 line: int | None = None

89 """The line number (1-based) of the beginning of this Node in `source`."""

91 tagname: str # defined in subclasses

92 """The element generic identifier."""

94 _document: document | None = None

96 @property

97 def document(self) -> document | None:

98 """Return the `document` root node of the tree containing this Node.

99 """

100 try:

101 return self._document or self.parent.document

102 except AttributeError:

103 return None

104

105 @document.setter

106 def document(self, value: document) -> None:

107 self._document = value

108

109 def __bool__(self) -> Literal[True]:

110 """

111 Node instances are always true, even if they're empty. A node is more

112 than a simple container. Its boolean "truth" does not depend on

113 having one or more subnodes in the doctree.

114

115 Use `len()` to check node length.

116 """

117 return True

118

119 def asdom(self,

120 dom: ModuleType | None = None,

121 ) -> minidom.Document | minidom.Element | minidom.Text:

122 # TODO: minidom.Document is only returned by document.asdom()

123 # (which overwrites this base-class implementation)

124 """Return a DOM **fragment** representation of this Node."""

125 if dom is None:

126 import xml.dom.minidom as dom

127 domroot = dom.Document()

128 return self._dom_node(domroot)

129

130 def pformat(self, indent: str = ' ', level: int = 0) -> str:

131 """

132 Return an indented pseudo-XML representation, for test purposes.

133

134 Override in subclasses.

135 """

136 raise NotImplementedError

137

138 def copy(self) -> Self:

139 """Return a copy of self."""

140 raise NotImplementedError

141

142 def deepcopy(self) -> Self:

143 """Return a deep copy of self (also copying children)."""

144 raise NotImplementedError

145

146 def astext(self) -> str:

147 """Return a string representation of this Node."""

148 raise NotImplementedError

149

150 def setup_child(self, child) -> None:

151 child.parent = self

152 if self.document:

153 child.document = self.document

154 if child.source is None:

155 child.source = self.document.current_source

156 if child.line is None:

157 child.line = self.document.current_line

158

159 def walk(self, visitor: NodeVisitor) -> bool:

160 """

161 Traverse a tree of `Node` objects, calling the

162 `dispatch_visit()` method of `visitor` when entering each

163 node. (The `walkabout()` method is similar, except it also

164 calls the `dispatch_departure()` method before exiting each

165 node.)

166

167 This tree traversal supports limited in-place tree

168 modifications. Replacing one node with one or more nodes is

169 OK, as is removing an element. However, if the node removed

170 or replaced occurs after the current node, the old node will

171 still be traversed, and any new nodes will not.

172

173 Within ``visit`` methods (and ``depart`` methods for

174 `walkabout()`), `TreePruningException` subclasses may be raised

175 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).

176

177 Parameter `visitor`: A `NodeVisitor` object, containing a

178 ``visit`` implementation for each `Node` subclass encountered.

179

180 Return true if we should stop the traversal.

181 """

182 stop = False

183 visitor.document.reporter.debug(

184 'docutils.nodes.Node.walk calling dispatch_visit for %s'

185 % self.__class__.__name__)

186 try:

187 try:

188 visitor.dispatch_visit(self)

189 except (SkipChildren, SkipNode):

190 return stop

191 except SkipDeparture: # not applicable; ignore

192 pass

193 children = self.children

194 try:

195 for child in children[:]:

196 if child.walk(visitor):

197 stop = True

198 break

199 except SkipSiblings:

200 pass

201 except StopTraversal:

202 stop = True

203 return stop

204

205 def walkabout(self, visitor: NodeVisitor) -> bool:

206 """

207 Perform a tree traversal similarly to `Node.walk()` (which

208 see), except also call the `dispatch_departure()` method

209 before exiting each node.

210

211 Parameter `visitor`: A `NodeVisitor` object, containing a

212 ``visit`` and ``depart`` implementation for each `Node`

213 subclass encountered.

214

215 Return true if we should stop the traversal.

216 """

217 call_depart = True

218 stop = False

219 visitor.document.reporter.debug(

220 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'

221 % self.__class__.__name__)

222 try:

223 try:

224 visitor.dispatch_visit(self)

225 except SkipNode:

226 return stop

227 except SkipDeparture:

228 call_depart = False

229 children = self.children

230 try:

231 for child in children[:]:

232 if child.walkabout(visitor):

233 stop = True

234 break

235 except SkipSiblings:

236 pass

237 except SkipChildren:

238 pass

239 except StopTraversal:

240 stop = True

241 if call_depart:

242 visitor.document.reporter.debug(

243 'docutils.nodes.Node.walkabout calling dispatch_departure '

244 'for %s' % self.__class__.__name__)

245 visitor.dispatch_departure(self)

246 return stop

247

248 def _fast_findall(self, cls: type) -> Iterator:

249 """Return iterator that only supports instance checks."""

250 if isinstance(self, cls):

251 yield self

252 for child in self.children:

253 yield from child._fast_findall(cls)

254

255 def _superfast_findall(self) -> Iterator:

256 """Return iterator that doesn't check for a condition."""

257 # This is different from ``iter(self)`` implemented via

258 # __getitem__() and __len__() in the Element subclass,

259 # which yields only the direct children.

260 yield self

261 for child in self.children:

262 yield from child._superfast_findall()

263

264 def findall(self,

265 condition: type | Callable[[Node], bool] | None = None,

266 include_self: bool = True,

267 descend: bool = True,

268 siblings: bool = False,

269 ascend: bool = False,

270 ) -> Iterator:

271 """

272 Return an iterator yielding nodes following `self`:

273

274 * self (if `include_self` is true)

275 * all descendants in tree traversal order (if `descend` is true)

276 * the following siblings (if `siblings` is true) and their

277 descendants (if also `descend` is true)

278 * the following siblings of the parent (if `ascend` is true) and

279 their descendants (if also `descend` is true), and so on.

280

281 If `condition` is not None, the iterator yields only nodes

282 for which ``condition(node)`` is true. If `condition` is a

283 type ``cls``, it is equivalent to a function consisting

284 of ``return isinstance(node, cls)``.

285

286 If `ascend` is true, assume `siblings` to be true as well.

287

288 If the tree structure is modified during iteration, the result

289 is undefined.

290

291 For example, given the following tree::

292

293 <paragraph>

294 <emphasis> <--- emphasis.traverse() and

295 <strong> <--- strong.traverse() are called.

296 Foo

297 Bar

298 <reference name="Baz" refid="baz">

299 Baz

300

301 Then tuple(emphasis.traverse()) equals ::

302

303 (<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)

304

305 and list(strong.traverse(ascend=True) equals ::

306

307 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]

308 """

309 if ascend:

310 siblings = True

311 # Check for special argument combinations that allow using an

312 # optimized version of traverse()

313 if include_self and descend and not siblings:

314 if condition is None:

315 yield from self._superfast_findall()

316 return

317 elif isinstance(condition, type):

318 yield from self._fast_findall(condition)

319 return

320 # Check if `condition` is a class (check for TypeType for Python

321 # implementations that use only new-style classes, like PyPy).

322 if isinstance(condition, type):

323 node_class = condition

324

325 def condition(node, node_class=node_class):

326 return isinstance(node, node_class)

327

328 if include_self and (condition is None or condition(self)):

329 yield self

330 if descend and len(self.children):

331 for child in self:

332 yield from child.findall(condition=condition,

333 include_self=True, descend=True,

334 siblings=False, ascend=False)

335 if siblings or ascend:

336 node = self

337 while node.parent:

338 index = node.parent.index(node)

339 # extra check since Text nodes have value-equality

340 while node.parent[index] is not node:

341 index = node.parent.index(node, index + 1)

342 for sibling in node.parent[index+1:]:

343 yield from sibling.findall(

344 condition=condition,

345 include_self=True, descend=descend,

346 siblings=False, ascend=False)

347 if not ascend:

348 break

349 else:

350 node = node.parent

351

352 def traverse(self,

353 condition: type | Callable[[Node], bool] | None = None,

354 include_self: bool = True,

355 descend: bool = True,

356 siblings: bool = False,

357 ascend: bool = False,

358 ) -> list:

359 """Return list of nodes following `self`.

360

361 For looping, Node.findall() is faster and more memory efficient.

362 """

363 # traverse() may be eventually removed:

364 warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',

365 DeprecationWarning, stacklevel=2)

366 return list(self.findall(condition, include_self, descend,

367 siblings, ascend))

368

369 def next_node(self,

370 condition: type | Callable[[Node], bool] | None = None,

371 include_self: bool = False,

372 descend: bool = True,

373 siblings: bool = False,

374 ascend: bool = False,

375 ) -> Node | None:

376 """

377 Return the first node in the iterator returned by findall(),

378 or None if the iterable is empty.

379

380 Parameter list is the same as of `findall()`. Note that `include_self`

381 defaults to False, though.

382 """

383 try:

384 return next(self.findall(condition, include_self,

385 descend, siblings, ascend))

386 except StopIteration:

387 return None

388

389 def validate(self, recursive: bool = True) -> None:

390 """Raise ValidationError if this node is not valid.

391

392 Override in subclasses that define validity constraints.

393 """

394

395 def validate_position(self) -> None:

396 """Hook for additional checks of the parent's content model.

397

398 Raise ValidationError, if `self` is at an invalid position.

399

400 Override in subclasses with complex validity constraints. See

401 `subtitle.validate_position()` and `transition.validate_position()`.

402 """

403

404

405class Text(Node, str): # NoQA: SLOT000 (Node doesn't define __slots__)

406 """

407 Instances are terminal nodes (leaves) containing text only; no child

408 nodes or attributes. Initialize by passing a string to the constructor.

409

410 Access the raw (null-escaped) text with ``str(<instance>)``

411 and unescaped text with ``<instance>.astext()``.

412 """

413

414 tagname: Final = '#text'

415

416 children: Final = ()

417 """Text nodes have no children, and cannot have children."""

418

419 def __new__(cls, data: str, rawsource: None = None) -> Self:

420 """Assert that `data` is not an array of bytes

421 and warn if the deprecated `rawsource` argument is used.

422 """

423 if isinstance(data, bytes):

424 raise TypeError('expecting str data, not bytes')

425 if rawsource is not None:

426 warnings.warn('nodes.Text: initialization argument "rawsource" '

427 'is ignored and will be removed in Docutils 2.0.',

428 DeprecationWarning, stacklevel=2)

429 return str.__new__(cls, data)

430

431 def shortrepr(self, maxlen: int = 18) -> str:

432 data = self

433 if len(data) > maxlen:

434 data = data[:maxlen-4] + ' ...'

435 return '<%s: %r>' % (self.tagname, str(data))

436

437 def __repr__(self) -> str:

438 return self.shortrepr(maxlen=68)

439

440 def astext(self) -> str:

441 return str(unescape(self))

442

443 def _dom_node(self, domroot: minidom.Document) -> minidom.Text:

444 return domroot.createTextNode(str(self))

445

446 def copy(self) -> Self:

447 return self.__class__(str(self))

448

449 def deepcopy(self) -> Self:

450 return self.copy()

451

452 def pformat(self, indent: str = ' ', level: int = 0) -> str:

453 try:

454 if self.document.settings.detailed:

455 tag = '%s%s' % (indent*level, '<#text>')

456 lines = (indent*(level+1) + repr(line)

457 for line in self.splitlines(True))

458 return '\n'.join((tag, *lines)) + '\n'

459 except AttributeError:

460 pass

461 indent = indent * level

462 lines = [indent+line for line in self.astext().splitlines()]

463 if not lines:

464 return ''

465 return '\n'.join(lines) + '\n'

466

467 # rstrip and lstrip are used by substitution definitions where

468 # they are expected to return a Text instance, this was formerly

469 # taken care of by UserString.

470

471 def rstrip(self, chars: str | None = None) -> Self:

472 return self.__class__(str.rstrip(self, chars))

473

474 def lstrip(self, chars: str | None = None) -> Self:

475 return self.__class__(str.lstrip(self, chars))

476

477

478class Element(Node):

479 """

480 `Element` is the superclass to all specific elements.

481

482 Elements contain attributes and child nodes.

483 They can be described as a cross between a list and a dictionary.

484

485 Elements emulate dictionaries for external [#]_ attributes, indexing by

486 attribute name (a string). To set the attribute 'att' to 'value', do::

487

488 element['att'] = 'value'

489

490 .. [#] External attributes correspond to the XML element attributes.

491 From its `Node` superclass, Element also inherits "internal"

492 class attributes that are accessed using the standard syntax, e.g.

493 ``element.parent``.

494

495 There are two special attributes: 'ids' and 'names'. Both are

496 lists of unique identifiers: 'ids' conform to the regular expression

497 ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and

498 details). 'names' serve as user-friendly interfaces to IDs; they are

499 case- and whitespace-normalized (see the fully_normalize_name() function).

500

501 Elements emulate lists for child nodes (element nodes and/or text

502 nodes), indexing by integer. To get the first child node, use::

503

504 element[0]

505

506 to iterate over the child nodes (without descending), use::

507

508 for child in element:

509 ...

510

511 Elements may be constructed using the ``+=`` operator. To add one new

512 child node to element, do::

513

514 element += node

515

516 This is equivalent to ``element.append(node)``.

517

518 To add a list of multiple child nodes at once, use the same ``+=``

519 operator::

520

521 element += [node1, node2]

522

523 This is equivalent to ``element.extend([node1, node2])``.

524 """

525

526 list_attributes: Final = ('ids', 'classes', 'names', 'dupnames')

527 """Tuple of attributes that are initialized to empty lists.

528

529 NOTE: Derived classes should update this value when supporting

530 additional list attributes.

531 """

532

533 valid_attributes: Final = list_attributes + ('source',)

534 """Tuple of attributes that are valid for elements of this class.

535

536 NOTE: Derived classes should update this value when supporting

537 additional attributes.

538 """

539

540 common_attributes: Final = valid_attributes

541 """Tuple of `common attributes`__ known to all Doctree Element classes.

542

543 __ https://docutils.sourceforge.io/docs/ref/doctree.html#common-attributes

544 """

545

546 known_attributes: Final = common_attributes

547 """Alias for `common_attributes`. Will be removed in Docutils 2.0."""

548

549 basic_attributes: Final = list_attributes

550 """Common list attributes. Deprecated. Will be removed in Docutils 2.0."""

551

552 local_attributes: Final = ('backrefs',)

553 """Obsolete. Will be removed in Docutils 2.0."""

554

555 content_model: ClassVar[_ContentModelTuple] = ()

556 """Python representation of the element's content model (cf. docutils.dtd).

557

558 A tuple of ``(category, quantifier)`` tuples with

559

560 :category: class or tuple of classes that are expected at this place(s)

561 in the list of children

562 :quantifier: string representation stating how many elements

563 of `category` are expected. Value is one of:

564 '.' (exactly one), '?' (zero or one),

565 '+' (one or more), '*' (zero or more).

566

567 NOTE: The default describes the empty element. Derived classes should

568 update this value to match their content model.

569

570 Provisional.

571 """

572

573 tagname: str | None = None

574 """The element generic identifier.

575

576 If None, it is set as an instance attribute to the name of the class.

577 """

578

579 child_text_separator: Final = '\n\n'

580 """Separator for child nodes, used by `astext()` method."""

581

582 def __init__(self,

583 rawsource: str = '',

584 *children,

585 **attributes: Any,

586 ) -> None:

587 self.rawsource = rawsource

588 """The raw text from which this element was constructed.

589

590 For informative and debugging purposes. Don't rely on its value!

591

592 NOTE: some elements do not set this value (default '').

593 """

594 if isinstance(rawsource, Element):

595 raise TypeError('First argument "rawsource" must be a string.')

596

597 self.children: list = []

598 """List of child nodes (elements and/or `Text`)."""

599

600 self.extend(children) # maintain parent info

601

602 self.attributes: dict[str, Any] = {}

603 """Dictionary of attribute {name: value}."""

604

605 # Initialize list attributes.

606 for att in self.list_attributes:

607 self.attributes[att] = []

608

609 for att, value in attributes.items():

610 att = att.lower() # normalize attribute name

611 if att in self.list_attributes:

612 # lists are mutable; make a copy for this node

613 self.attributes[att] = value[:]

614 else:

615 self.attributes[att] = value

616

617 if self.tagname is None:

618 self.tagname: str = self.__class__.__name__

619

620 def _dom_node(self, domroot: minidom.Document) -> minidom.Element:

621 element = domroot.createElement(self.tagname)

622 for attribute, value in self.attlist():

623 if isinstance(value, list):

624 value = ' '.join(serial_escape('%s' % (v,)) for v in value)

625 element.setAttribute(attribute, '%s' % value)

626 for child in self.children:

627 element.appendChild(child._dom_node(domroot))

628 return element

629

630 def __repr__(self) -> str:

631 data = ''

632 for c in self.children:

633 data += c.shortrepr()

634 if len(data) > 60:

635 data = data[:56] + ' ...'

636 break

637 if self['names']:

638 return '<%s "%s": %s>' % (self.tagname,

639 '; '.join(self['names']), data)

640 else:

641 return '<%s: %s>' % (self.tagname, data)

642

643 def shortrepr(self) -> str:

644 if self['names']:

645 return '<%s "%s"...>' % (self.tagname, '; '.join(self['names']))

646 else:

647 return '<%s...>' % self.tagname

648

649 def __str__(self) -> str:

650 if self.children:

651 return '%s%s%s' % (self.starttag(),

652 ''.join(str(c) for c in self.children),

653 self.endtag())

654 else:

655 return self.emptytag()

656

657 def starttag(self, quoteattr: Callable[[str], str] | None = None) -> str:

658 # the optional arg is used by the docutils_xml writer

659 if quoteattr is None:

660 quoteattr = pseudo_quoteattr

661 parts = [self.tagname]

662 for name, value in self.attlist():

663 if value is None: # boolean attribute

664 parts.append('%s="True"' % name)

665 continue

666 if isinstance(value, bool):

667 value = str(int(value))

668 if isinstance(value, list):

669 values = [serial_escape('%s' % (v,)) for v in value]

670 value = ' '.join(values)

671 else:

672 value = str(value)

673 value = quoteattr(value)

674 parts.append('%s=%s' % (name, value))

675 return '<%s>' % ' '.join(parts)

676

677 def endtag(self) -> str:

678 return '</%s>' % self.tagname

679

680 def emptytag(self) -> str:

681 attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())

682 return '<%s/>' % ' '.join((self.tagname, *attributes))

683

684 def __len__(self) -> int:

685 return len(self.children)

686

687 def __contains__(self, key) -> bool:

688 # Test for both, children and attributes with operator ``in``.

689 if isinstance(key, str):

690 return key in self.attributes

691 return key in self.children

692

693 def __getitem__(self, key: str | int | slice) -> Any:

694 if isinstance(key, str):

695 return self.attributes[key]

696 elif isinstance(key, int):

697 return self.children[key]

698 elif isinstance(key, slice):

699 assert key.step in (None, 1), 'cannot handle slice with stride'

700 return self.children[key.start:key.stop]

701 else:

702 raise TypeError('element index must be an integer, a slice, or '

703 'an attribute name string')

704

705 def __setitem__(self, key, item) -> None:

706 if isinstance(key, str):

707 self.attributes[str(key)] = item

708 elif isinstance(key, int):

709 self.setup_child(item)

710 self.children[key] = item

711 elif isinstance(key, slice):

712 assert key.step in (None, 1), 'cannot handle slice with stride'

713 for node in item:

714 self.setup_child(node)

715 self.children[key.start:key.stop] = item

716 else:

717 raise TypeError('element index must be an integer, a slice, or '

718 'an attribute name string')

719

720 def __delitem__(self, key: str | int | slice) -> None:

721 if isinstance(key, str):

722 del self.attributes[key]

723 elif isinstance(key, int):

724 del self.children[key]

725 elif isinstance(key, slice):

726 assert key.step in (None, 1), 'cannot handle slice with stride'

727 del self.children[key.start:key.stop]

728 else:

729 raise TypeError('element index must be an integer, a simple '

730 'slice, or an attribute name string')

731

732 def __add__(self, other: list) -> list:

733 return self.children + other

734

735 def __radd__(self, other: list) -> list:

736 return other + self.children

737

738 def __iadd__(self, other) -> Self:

739 """Append a node or a list of nodes to `self.children`."""

740 if isinstance(other, Node):

741 self.append(other)

742 elif other is not None:

743 self.extend(other)

744 return self

745

746 def astext(self) -> str:

747 return self.child_text_separator.join(

748 [child.astext() for child in self.children])

749

750 def non_default_attributes(self) -> dict[str, Any]:

751 atts = {key: value for key, value in self.attributes.items()

752 if self.is_not_default(key)}

753 return atts

754

755 def attlist(self) -> list[tuple[str, Any]]:

756 return sorted(self.non_default_attributes().items())

757

758 def get(self, key: str, failobj: Any | None = None) -> Any:

759 return self.attributes.get(key, failobj)

760

761 def hasattr(self, attr: str) -> bool:

762 return attr in self.attributes

763

764 def delattr(self, attr: str) -> None:

765 if attr in self.attributes:

766 del self.attributes[attr]

767

768 def setdefault(self, key: str, failobj: Any | None = None) -> Any:

769 return self.attributes.setdefault(key, failobj)

770

771 has_key = hasattr

772

773 def get_language_code(self, fallback: str = '') -> str:

774 """Return node's language tag.

775

776 Look iteratively in self and parents for a class argument

777 starting with ``language-`` and return the remainder of it

778 (which should be a `BCP49` language tag) or the `fallback`.

779 """

780 for cls in self.get('classes', []):

781 if cls.startswith('language-'):

782 return cls.removeprefix('language-')

783 try:

784 return self.parent.get_language_code(fallback)

785 except AttributeError:

786 return fallback

787

788 def append(self, item) -> None:

789 self.setup_child(item)

790 self.children.append(item)

791

792 def extend(self, item: Iterable) -> None:

793 for node in item:

794 self.append(node)

795

796 def insert(self, index: SupportsIndex, item) -> None:

797 if isinstance(item, Node):

798 self.setup_child(item)

799 self.children.insert(index, item)

800 elif item is not None:

801 self[index:index] = item

802

803 def pop(self, i: int = -1):

804 return self.children.pop(i)

805

806 def remove(self, item) -> None:

807 self.children.remove(item)

808

809 def index(self, item, start: int = 0, stop: int = sys.maxsize) -> int:

810 return self.children.index(item, start, stop)

811

812 def previous_sibling(self):

813 """Return preceding sibling node or ``None``."""

814 try:

815 i = self.parent.index(self)

816 except (AttributeError):

817 return None

818 return self.parent[i-1] if i > 0 else None

819

820 def section_hierarchy(self) -> list[section]:

821 """Return the element's section hierarchy.

822

823 Return a list of all <section> elements containing `self`

824 (including `self` if it is a <section>).

825

826 List item ``[i]`` is the parent <section> of level i+1

827 (1: section, 2: subsection, 3: subsubsection, ...).

828 The length of the list is the element's section level.

829

830 Provisional. May be changed or removed without warning.

831 """

832 sections = []

833 node = self

834 while node is not None:

835 if isinstance(node, section):

836 sections.append(node)

837 node = node.parent

838 sections.reverse()

839 return sections

840

841 def is_not_default(self, key: str) -> bool:

842 if self[key] == [] and key in self.list_attributes:

843 return False

844 else:

845 return True

846

847 def update_basic_atts(self, dict_: Mapping[str, Any] | Element) -> None:

848 """

849 Update basic attributes ('ids', 'names', 'classes',

850 'dupnames', but not 'source') from node or dictionary `dict_`.

851

852 Provisional.

853 """

854 if isinstance(dict_, Node):

855 dict_ = dict_.attributes

856 for att in self.basic_attributes:

857 self.append_attr_list(att, dict_.get(att, []))

858

859 def append_attr_list(self, attr: str, values: Iterable[Any]) -> None:

860 """

861 For each element in values, if it does not exist in self[attr], append

862 it.

863

864 NOTE: Requires self[attr] and values to be sequence type and the

865 former should specifically be a list.

866 """

867 # List Concatenation

868 for value in values:

869 if value not in self[attr]:

870 self[attr].append(value)

871

872 def coerce_append_attr_list(

873 self, attr: str, value: list[Any] | Any) -> None:

874 """

875 First, convert both self[attr] and value to a non-string sequence

876 type; if either is not already a sequence, convert it to a list of one

877 element. Then call append_attr_list.

878

879 NOTE: self[attr] and value both must not be None.

880 """

881 # List Concatenation

882 if not isinstance(self.get(attr), list):

883 self[attr] = [self[attr]]

884 if not isinstance(value, list):

885 value = [value]

886 self.append_attr_list(attr, value)

887

888 def replace_attr(self, attr: str, value: Any, force: bool = True) -> None:

889 """

890 If self[attr] does not exist or force is True or omitted, set

891 self[attr] to value, otherwise do nothing.

892 """

893 # One or the other

894 if force or self.get(attr) is None:

895 self[attr] = value

896

897 def copy_attr_convert(

898 self, attr: str, value: Any, replace: bool = True) -> None:

899 """

900 If attr is an attribute of self, set self[attr] to

901 [self[attr], value], otherwise set self[attr] to value.

902

903 NOTE: replace is not used by this function and is kept only for

904 compatibility with the other copy functions.

905 """

906 if self.get(attr) is not value:

907 self.coerce_append_attr_list(attr, value)

908

909 def copy_attr_coerce(self, attr: str, value: Any, replace: bool) -> None:

910 """

911 If attr is an attribute of self and either self[attr] or value is a

912 list, convert all non-sequence values to a sequence of 1 element and

913 then concatenate the two sequence, setting the result to self[attr].

914 If both self[attr] and value are non-sequences and replace is True or

915 self[attr] is None, replace self[attr] with value. Otherwise, do

916 nothing.

917 """

918 if self.get(attr) is not value:

919 if isinstance(self.get(attr), list) or \

920 isinstance(value, list):

921 self.coerce_append_attr_list(attr, value)

922 else:

923 self.replace_attr(attr, value, replace)

924

925 def copy_attr_concatenate(

926 self, attr: str, value: Any, replace: bool) -> None:

927 """

928 If attr is an attribute of self and both self[attr] and value are

929 lists, concatenate the two sequences, setting the result to

930 self[attr]. If either self[attr] or value are non-sequences and

931 replace is True or self[attr] is None, replace self[attr] with value.

932 Otherwise, do nothing.

933 """

934 if self.get(attr) is not value:

935 if isinstance(self.get(attr), list) and \

936 isinstance(value, list):

937 self.append_attr_list(attr, value)

938 else:

939 self.replace_attr(attr, value, replace)

940

941 def copy_attr_consistent(

942 self, attr: str, value: Any, replace: bool) -> None:

943 """

944 If replace is True or self[attr] is None, replace self[attr] with

945 value. Otherwise, do nothing.

946 """

947 if self.get(attr) is not value:

948 self.replace_attr(attr, value, replace)

949

950 def update_all_atts(self,

951 dict_: Mapping[str, Any] | Element,

952 update_fun: _UpdateFun = copy_attr_consistent,

953 replace: bool = True,

954 and_source: bool = False,

955 ) -> None:

956 """

957 Updates all attributes from node or dictionary `dict_`.

958

959 Appends the basic attributes ('ids', 'names', 'classes',

960 'dupnames', but not 'source') and then, for all other attributes in

961 dict_, updates the same attribute in self. When attributes with the

962 same identifier appear in both self and dict_, the two values are

963 merged based on the value of update_fun. Generally, when replace is

964 True, the values in self are replaced or merged with the values in

965 dict_; otherwise, the values in self may be preserved or merged. When

966 and_source is True, the 'source' attribute is included in the copy.

967

968 NOTE: When replace is False, and self contains a 'source' attribute,

969 'source' is not replaced even when dict_ has a 'source'

970 attribute, though it may still be merged into a list depending

971 on the value of update_fun.

972 NOTE: It is easier to call the update-specific methods then to pass

973 the update_fun method to this function.

974 """

975 if isinstance(dict_, Node):

976 dict_ = dict_.attributes

977

978 # Include the source attribute when copying?

979 if and_source:

980 filter_fun = self.is_not_list_attribute

981 else:

982 filter_fun = self.is_not_known_attribute

983

984 # Copy the basic attributes

985 self.update_basic_atts(dict_)

986

987 # Grab other attributes in dict_ not in self except the

988 # (All basic attributes should be copied already)

989 for att in filter(filter_fun, dict_):

990 update_fun(self, att, dict_[att], replace)

991

992 def update_all_atts_consistantly(self,

993 dict_: Mapping[str, Any] | Element,

994 replace: bool = True,

995 and_source: bool = False,

996 ) -> None:

997 """

998 Updates all attributes from node or dictionary `dict_`.

999

1000 Appends the basic attributes ('ids', 'names', 'classes',

1001 'dupnames', but not 'source') and then, for all other attributes in

1002 dict_, updates the same attribute in self. When attributes with the

1003 same identifier appear in both self and dict_ and replace is True, the

1004 values in self are replaced with the values in dict_; otherwise, the

1005 values in self are preserved. When and_source is True, the 'source'

1006 attribute is included in the copy.

1007

1008 NOTE: When replace is False, and self contains a 'source' attribute,

1009 'source' is not replaced even when dict_ has a 'source'

1010 attribute, though it may still be merged into a list depending

1011 on the value of update_fun.

1012 """

1013 self.update_all_atts(dict_, Element.copy_attr_consistent, replace,

1014 and_source)

1015

1016 def update_all_atts_concatenating(self,

1017 dict_: Mapping[str, Any] | Element,

1018 replace: bool = True,

1019 and_source: bool = False,

1020 ) -> None:

1021 """

1022 Updates all attributes from node or dictionary `dict_`.

1023

1024 Appends the basic attributes ('ids', 'names', 'classes',

1025 'dupnames', but not 'source') and then, for all other attributes in

1026 dict_, updates the same attribute in self. When attributes with the

1027 same identifier appear in both self and dict_ whose values aren't each

1028 lists and replace is True, the values in self are replaced with the

1029 values in dict_; if the values from self and dict_ for the given

1030 identifier are both of list type, then the two lists are concatenated

1031 and the result stored in self; otherwise, the values in self are

1032 preserved. When and_source is True, the 'source' attribute is

1033 included in the copy.

1034

1035 NOTE: When replace is False, and self contains a 'source' attribute,

1036 'source' is not replaced even when dict_ has a 'source'

1037 attribute, though it may still be merged into a list depending

1038 on the value of update_fun.

1039 """

1040 self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,

1041 and_source)

1042

1043 def update_all_atts_coercion(self,

1044 dict_: Mapping[str, Any] | Element,

1045 replace: bool = True,

1046 and_source: bool = False,

1047 ) -> None:

1048 """

1049 Updates all attributes from node or dictionary `dict_`.

1050

1051 Appends the basic attributes ('ids', 'names', 'classes',

1052 'dupnames', but not 'source') and then, for all other attributes in

1053 dict_, updates the same attribute in self. When attributes with the

1054 same identifier appear in both self and dict_ whose values are both

1055 not lists and replace is True, the values in self are replaced with

1056 the values in dict_; if either of the values from self and dict_ for

1057 the given identifier are of list type, then first any non-lists are

1058 converted to 1-element lists and then the two lists are concatenated

1059 and the result stored in self; otherwise, the values in self are

1060 preserved. When and_source is True, the 'source' attribute is

1061 included in the copy.

1062

1063 NOTE: When replace is False, and self contains a 'source' attribute,

1064 'source' is not replaced even when dict_ has a 'source'

1065 attribute, though it may still be merged into a list depending

1066 on the value of update_fun.

1067 """

1068 self.update_all_atts(dict_, Element.copy_attr_coerce, replace,

1069 and_source)

1070

1071 def update_all_atts_convert(self,

1072 dict_: Mapping[str, Any] | Element,

1073 and_source: bool = False,

1074 ) -> None:

1075 """

1076 Updates all attributes from node or dictionary `dict_`.

1077

1078 Appends the basic attributes ('ids', 'names', 'classes',

1079 'dupnames', but not 'source') and then, for all other attributes in

1080 dict_, updates the same attribute in self. When attributes with the

1081 same identifier appear in both self and dict_ then first any non-lists

1082 are converted to 1-element lists and then the two lists are

1083 concatenated and the result stored in self; otherwise, the values in

1084 self are preserved. When and_source is True, the 'source' attribute

1085 is included in the copy.

1086

1087 NOTE: When replace is False, and self contains a 'source' attribute,

1088 'source' is not replaced even when dict_ has a 'source'

1089 attribute, though it may still be merged into a list depending

1090 on the value of update_fun.

1091 """

1092 self.update_all_atts(dict_, Element.copy_attr_convert,

1093 and_source=and_source)

1094

1095 def clear(self) -> None:

1096 self.children = []

1097

1098 def replace(self, old, new) -> None:

1099 """Replace one child `Node` with another child or children."""

1100 index = self.index(old)

1101 if isinstance(new, Node):

1102 self.setup_child(new)

1103 self[index] = new

1104 elif new is not None:

1105 self[index:index+1] = new

1106

1107 def replace_self(self, new) -> None:

1108 """

1109 Replace `self` node with `new`, where `new` is a node or a

1110 list of nodes.

1111

1112 Provisional: the handling of node attributes will be revised.

1113 """

1114 update = new

1115 if not isinstance(new, Node):

1116 # `new` is a list; update first child.

1117 try:

1118 update = new[0]

1119 except IndexError:

1120 update = None

1121 if isinstance(update, Element):

1122 update.update_basic_atts(self)

1123 else:

1124 # `update` is a Text node or `new` is an empty list.

1125 # Assert that we aren't losing any attributes.

1126 for att in self.basic_attributes:

1127 assert not self[att], \

1128 'Losing "%s" attribute: %s' % (att, self[att])

1129 self.parent.replace(self, new)

1130

1131 def first_child_matching_class(self,

1132 childclass: type[Element] | type[Text]

1133 | tuple[type[Element] | type[Text], ...],

1134 start: int = 0,

1135 end: int = sys.maxsize,

1136 ) -> int | None:

1137 """

1138 Return the index of the first child whose class exactly matches.

1139

1140 Parameters:

1141

1142 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`

1143 classes. If a tuple, any of the classes may match.

1144 - `start`: Initial index to check.

1145 - `end`: Initial index to *not* check.

1146 """

1147 if not isinstance(childclass, tuple):

1148 childclass = (childclass,)

1149 for index in range(start, min(len(self), end)):

1150 for c in childclass:

1151 if isinstance(self[index], c):

1152 return index

1153 return None

1154

1155 def first_child_not_matching_class(

1156 self,

1157 childclass: type[Element] | type[Text]

1158 | tuple[type[Element] | type[Text], ...],

1159 start: int = 0,

1160 end: int = sys.maxsize,

1161 ) -> int | None:

1162 """

1163 Return the index of the first child whose class does *not* match.

1164

1165 Parameters:

1166

1167 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`

1168 classes. If a tuple, none of the classes may match.

1169 - `start`: Initial index to check.

1170 - `end`: Initial index to *not* check.

1171 """

1172 if not isinstance(childclass, tuple):

1173 childclass = (childclass,)

1174 for index in range(start, min(len(self), end)):

1175 for c in childclass:

1176 if isinstance(self.children[index], c):

1177 break

1178 else:

1179 return index

1180 return None

1181

1182 def pformat(self, indent: str = ' ', level: int = 0) -> str:

1183 tagline = '%s%s\n' % (indent*level, self.starttag())

1184 childreps = (c.pformat(indent, level+1) for c in self.children)

1185 return ''.join((tagline, *childreps))

1186

1187 def copy(self) -> Self:

1188 obj = self.__class__(rawsource=self.rawsource, **self.attributes)

1189 obj._document = self._document

1190 obj.source = self.source

1191 obj.line = self.line

1192 return obj

1193

1194 def deepcopy(self) -> Self:

1195 copy = self.copy()

1196 copy.extend([child.deepcopy() for child in self.children])

1197 return copy

1198

1199 def note_referenced_by(self,

1200 name: str | None = None,

1201 id: str | None = None,

1202 ) -> None:

1203 """Note that this Element has been referenced by its name

1204 `name` or id `id`."""

1205 self.referenced = True

1206 # Element.expect_referenced_by_* dictionaries map names or ids

1207 # to nodes whose ``referenced`` attribute is set to true as

1208 # soon as this node is referenced by the given name or id.

1209 # Needed for target propagation.

1210 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)

1211 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)

1212 if by_name:

1213 assert name is not None

1214 by_name.referenced = True

1215 if by_id:

1216 assert id is not None

1217 by_id.referenced = True

1218

1219 @classmethod

1220 def is_not_list_attribute(cls, attr: str) -> bool:

1221 """

1222 Returns True if and only if the given attribute is NOT one of the

1223 basic list attributes defined for all Elements.

1224 """

1225 return attr not in cls.list_attributes

1226

1227 @classmethod

1228 def is_not_known_attribute(cls, attr: str) -> bool:

1229 """

1230 Return True if `attr` is NOT defined for all Element instances.

1231

1232 Provisional. May be removed in Docutils 2.0.

1233 """

1234 return attr not in cls.common_attributes

1235

1236 def validate_attributes(self) -> None:

1237 """Normalize and validate element attributes.

1238

1239 Convert string values to expected datatype.

1240 Normalize values.

1241

1242 Raise `ValidationError` for invalid attributes or attribute values.

1243

1244 Provisional.

1245 """

1246 messages = []

1247 for key, value in self.attributes.items():

1248 if key.startswith('internal:'):

1249 continue # see docs/user/config.html#expose-internals

1250 if key not in self.valid_attributes:

1251 va = '", "'.join(self.valid_attributes)

1252 messages.append(f'Attribute "{key}" not one of "{va}".')

1253 continue

1254 try:

1255 self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)

1256 except (ValueError, TypeError, KeyError) as e:

1257 messages.append(

1258 f'Attribute "{key}" has invalid value "{value}".\n {e}')

1259 if messages:

1260 raise ValidationError(f'Element {self.starttag()} invalid:\n '

1261 + '\n '.join(messages),

1262 problematic_element=self)

1263

1264 def validate_content(self,

1265 model: _ContentModelTuple | None = None,

1266 elements: Sequence | None = None,

1267 ) -> list:

1268 """Test compliance of `elements` with `model`.

1269

1270 :model: content model description, default `self.content_model`,

1271 :elements: list of doctree elements, default `self.children`.

1272

1273 Return list of children that do not fit in the model or raise

1274 `ValidationError` if the content does not comply with the `model`.

1275

1276 Provisional.

1277 """

1278 if model is None:

1279 model = self.content_model

1280 if elements is None:

1281 elements = self.children

1282 ichildren = iter(elements)

1283 child = next(ichildren, None)

1284 for category, quantifier in model:

1285 if not isinstance(child, category):

1286 if quantifier in ('.', '+'):

1287 raise ValidationError(self._report_child(child, category),

1288 problematic_element=child)

1289 else: # quantifier in ('?', '*') -> optional child

1290 continue # try same child with next part of content model

1291 else:

1292 # Check additional placement constraints (if applicable):

1293 child.validate_position()

1294 # advance:

1295 if quantifier in ('.', '?'): # go to next element

1296 child = next(ichildren, None)

1297 else: # if quantifier in ('*', '+'): # pass all matching elements

1298 for child in ichildren:

1299 if not isinstance(child, category):

1300 break

1301 try:

1302 child.validate_position()

1303 except AttributeError:

1304 pass

1305 else:

1306 child = None

1307 return [] if child is None else [child, *ichildren]

1308

1309 def _report_child(self,

1310 child,

1311 category: Element | Iterable[Element],

1312 ) -> str:

1313 # Return a str reporting a missing child or child of wrong category.

1314 try:

1315 _type = category.__name__

1316 except AttributeError:

1317 _type = '> or <'.join(c.__name__ for c in category)

1318 msg = f'Element {self.starttag()} invalid:\n'

1319 if child is None:

1320 return f'{msg} Missing child of type <{_type}>.'

1321 if isinstance(child, Text):

1322 return (f'{msg} Expecting child of type <{_type}>, '

1323 f'not text data "{child.astext()}".')

1324 return (f'{msg} Expecting child of type <{_type}>, '

1325 f'not {child.starttag()}.')

1326

1327 def validate(self, recursive: bool = True) -> None:

1328 """Validate Docutils Document Tree element ("doctree").

1329

1330 Raise ValidationError if there are violations.

1331 If `recursive` is True, validate also the element's descendants.

1332

1333 See `The Docutils Document Tree`__ for details of the

1334 Docutils Document Model.

1335

1336 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1337

1338 Provisional (work in progress).

1339 """

1340 self.validate_attributes()

1341

1342 leftover_childs = self.validate_content()

1343 for child in leftover_childs:

1344 if isinstance(child, Text):

1345 raise ValidationError(f'Element {self.starttag()} invalid:\n'

1346 f' Spurious text: "{child.astext()}".',

1347 problematic_element=self)

1348 else:

1349 raise ValidationError(f'Element {self.starttag()} invalid:\n'

1350 f' Child element {child.starttag()} '

1351 'not allowed at this position.',

1352 problematic_element=child)

1353

1354 if recursive:

1355 for child in self:

1356 child.validate(recursive=recursive)

1357

1358

1359# ====================

1360# Element Categories

1361# ====================

1362#

1363# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-hierarchy.

1364

1365class Root:

1366 """Element at the root of a document tree."""

1367

1368

1369class Structural:

1370 """`Structural elements`__.

1371

1372 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1373 #structural-elements

1374 """

1375

1376

1377class SubStructural:

1378 """`Structural subelements`__ are children of `Structural` elements.

1379

1380 Most Structural elements accept only specific `SubStructural` elements.

1381

1382 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1383 #structural-subelements

1384 """

1385

1386

1387class Bibliographic:

1388 """`Bibliographic Elements`__ (displayed document meta-data).

1389

1390 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1391 #bibliographic-elements

1392 """

1393

1394

1395class Body:

1396 """`Body elements`__.

1397

1398 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-elements

1399 """

1400

1401

1402class Admonition(Body):

1403 """Admonitions (distinctive and self-contained notices)."""

1404 content_model: Final = ((Body, '+'),) # (%body.elements;)+

1405

1406

1407class Sequential(Body):

1408 """List-like body elements."""

1409

1410

1411class General(Body):

1412 """Miscellaneous body elements."""

1413

1414

1415class Special(Body):

1416 """Special internal body elements."""

1417

1418

1419class Part:

1420 """`Body Subelements`__ always occur within specific parent elements.

1421

1422 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-subelements

1423 """

1424

1425

1426class Decorative:

1427 """Decorative elements (`header` and `footer`).

1428

1429 Children of `decoration`.

1430 """

1431 content_model: Final = ((Body, '+'),) # (%body.elements;)+

1432

1433

1434class Inline:

1435 """Inline elements contain text data and possibly other inline elements.

1436 """

1437

1438

1439# Orthogonal categories and Mixins

1440# ================================

1441

1442class PreBibliographic:

1443 """Elements which may occur before Bibliographic Elements."""

1444

1445

1446class Invisible(Special, PreBibliographic):

1447 """Internal elements that don't appear in output."""

1448

1449

1450class Labeled:

1451 """Contains a `label` as its first element."""

1452

1453

1454class Resolvable:

1455 resolved: bool = False

1456

1457

1458class BackLinkable:

1459 """Mixin for Elements that accept a "backrefs" attribute."""

1460

1461 list_attributes: Final = Element.list_attributes + ('backrefs',)

1462 valid_attributes: Final = Element.valid_attributes + ('backrefs',)

1463

1464 def add_backref(self: Element, refid: str) -> None:

1465 self['backrefs'].append(refid)

1466

1467

1468class Referential(Resolvable):

1469 """Elements holding a cross-reference (outgoing hyperlink)."""

1470

1471

1472class Targetable(Resolvable):

1473 """Cross-reference targets (incoming hyperlink)."""

1474 referenced: int = 0

1475

1476 indirect_reference_name: str | None = None

1477 """Holds the whitespace_normalized_name (contains mixed case) of a target.

1478

1479 This was required for MoinMoin <= 1.9 compatibility.

1480

1481 Deprecated, will be removed in Docutils 1.0.

1482 """

1483

1484

1485class Titular:

1486 """Title, sub-title, or informal heading (rubric)."""

1487

1488

1489class TextElement(Element):

1490 """

1491 An element which directly contains text.

1492

1493 Its children are all `Text` or `Inline` subclass nodes. You can

1494 check whether an element's context is inline simply by checking whether

1495 its immediate parent is a `TextElement` instance (including subclasses).

1496 This is handy for nodes like `image` that can appear both inline and as

1497 standalone body elements.

1498

1499 If passing children to `__init__()`, make sure to set `text` to

1500 ``''`` or some other suitable value.

1501 """

1502 content_model: Final = (((Text, Inline), '*'),)

1503 # (#PCDATA | %inline.elements;)*

1504

1505 child_text_separator: Final = ''

1506 """Separator for child nodes, used by `astext()` method."""

1507

1508 def __init__(self,

1509 rawsource: str = '',

1510 text: str = '',

1511 *children,

1512 **attributes: Any,

1513 ) -> None:

1514 if text:

1515 textnode = Text(text)

1516 Element.__init__(self, rawsource, textnode, *children,

1517 **attributes)

1518 else:

1519 Element.__init__(self, rawsource, *children, **attributes)

1520

1521

1522class FixedTextElement(TextElement):

1523 """An element which directly contains preformatted text."""

1524

1525 valid_attributes: Final = Element.valid_attributes + ('xml:space',)

1526

1527 def __init__(self,

1528 rawsource: str = '',

1529 text: str = '',

1530 *children,

1531 **attributes: Any,

1532 ) -> None:

1533 super().__init__(rawsource, text, *children, **attributes)

1534 self.attributes['xml:space'] = 'preserve'

1535

1536

1537class PureTextElement(TextElement):

1538 """An element which only contains text, no children."""

1539 content_model: Final = ((Text, '?'),) # (#PCDATA)

1540

1541

1542# =================================

1543# Concrete Document Tree Elements

1544# =================================

1545#

1546# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-reference

1547

1548# Decorative Elements

1549# ===================

1550

1551class header(Decorative, Element): pass

1552class footer(Decorative, Element): pass

1553

1554

1555# Structural Subelements

1556# ======================

1557

1558class title(Titular, PreBibliographic, SubStructural, TextElement):

1559 """Title of `document`, `section`, `topic` and generic `admonition`.

1560 """

1561 valid_attributes: Final = Element.valid_attributes + ('auto', 'refid')

1562

1563

1564class subtitle(Titular, PreBibliographic, SubStructural, TextElement):

1565 """Sub-title of `document`, `section` and `sidebar`."""

1566

1567 def validate_position(self) -> None:

1568 """Check position of subtitle: must follow a title."""

1569 if self.parent and self.parent.index(self) == 0:

1570 raise ValidationError(f'Element {self.parent.starttag()} invalid:'

1571 '\n <subtitle> only allowed after <title>.',

1572 problematic_element=self)

1573

1574

1575class meta(PreBibliographic, SubStructural, Element):

1576 """Container for "invisible" bibliographic data, or meta-data."""

1577 valid_attributes: Final = Element.valid_attributes + (

1578 'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')

1579

1580

1581class docinfo(SubStructural, Element):

1582 """Container for displayed document meta-data."""

1583 content_model: Final = ((Bibliographic, '+'),)

1584 # (%bibliographic.elements;)+

1585

1586

1587class decoration(PreBibliographic, SubStructural, Element):

1588 """Container for `header` and `footer`."""

1589 content_model: Final = ((header, '?'), # Empty element doesn't make sense,

1590 (footer, '?'), # but is simpler to define.

1591 )

1592 # (header?, footer?)

1593

1594 def get_header(self) -> header:

1595 if not len(self.children) or not isinstance(self.children[0], header):

1596 self.insert(0, header())

1597 return self.children[0]

1598

1599 def get_footer(self) -> footer:

1600 if not len(self.children) or not isinstance(self.children[-1], footer):

1601 self.append(footer())

1602 return self.children[-1]

1603

1604

1605class transition(SubStructural, Element):

1606 """Transitions__ are breaks between untitled text parts.

1607

1608 __ https://docutils.sourceforge.io/docs/ref/doctree.html#transition

1609 """

1610

1611 def validate_position(self) -> None:

1612 """Check additional constraints on `transition` placement.

1613

1614 A transition may not begin or end a section or document,

1615 nor may two transitions be immediately adjacent.

1616 """

1617 messages = [f'Element {self.parent.starttag()} invalid:']

1618 predecessor = self.previous_sibling()

1619 if (predecessor is None # index == 0

1620 or isinstance(predecessor, (title, subtitle, meta, decoration))

1621 # A transition following these elements still counts as

1622 # "at the beginning of a document or section".

1623 ):

1624 messages.append(

1625 '<transition> may not begin a section or document.')

1626 if self.parent.index(self) == len(self.parent) - 1:

1627 messages.append('<transition> may not end a section or document.')

1628 if isinstance(predecessor, transition):

1629 messages.append(

1630 '<transition> may not directly follow another transition.')

1631 if len(messages) > 1:

1632 raise ValidationError('\n '.join(messages),

1633 problematic_element=self)

1634

1635

1636# Structural Elements

1637# ===================

1638

1639class topic(Structural, Element):

1640 """

1641 Topics__ are non-recursive, mini-sections.

1642

1643 __ https://docutils.sourceforge.io/docs/ref/doctree.html#topic

1644 """

1645 content_model: Final = ((title, '?'), (Body, '+'))

1646 # (title?, (%body.elements;)+)

1647

1648

1649class sidebar(Structural, Element):

1650 """

1651 Sidebars__ are like parallel documents providing related material.

1652

1653 A sidebar is typically offset by a border and "floats" to the side

1654 of the page

1655

1656 __ https://docutils.sourceforge.io/docs/ref/doctree.html#sidebar

1657 """

1658 content_model: Final = ((title, '?'),

1659 (subtitle, '?'),

1660 ((topic, Body), '+'),

1661 )

1662 # ((title, subtitle?)?, (%body.elements; | topic)+)

1663 # "subtitle only after title" is ensured in `subtitle.validate_position()`.

1664

1665

1666class section(Structural, Element):

1667 """Document section__. The main unit of hierarchy.

1668

1669 __ https://docutils.sourceforge.io/docs/ref/doctree.html#section

1670 """

1671 # recursive content model, see below

1672

1673

1674section.content_model = ((title, '.'),

1675 (subtitle, '?'),

1676 ((Body, topic, sidebar, transition), '*'),

1677 ((section, transition), '*'),

1678 )

1679# (title, subtitle?, %structure.model;)

1680# Correct transition placement is ensured in `transition.validate_position()`.

1681

1682

1683# Root Element

1684# ============

1685

1686class document(Root, Element):

1687 """

1688 The document root element.

1689

1690 Do not instantiate this class directly; use

1691 `docutils.utils.new_document()` instead.

1692 """

1693 valid_attributes: Final = Element.valid_attributes + ('title',)

1694 content_model: Final = ((title, '?'),

1695 (subtitle, '?'),

1696 (meta, '*'),

1697 (decoration, '?'),

1698 (docinfo, '?'),

1699 (transition, '?'),

1700 ((Body, topic, sidebar, transition), '*'),

1701 ((section, transition), '*'),

1702 )

1703 # ( (title, subtitle?)?,

1704 # meta*,

1705 # decoration?,

1706 # (docinfo, transition?)?,

1707 # %structure.model; )

1708 # Additional restrictions for `subtitle` and `transition` are tested

1709 # with the respective `validate_position()` methods.

1710

1711 def __init__(self,

1712 settings: Values,

1713 reporter: Reporter,

1714 *args,

1715 **kwargs: Any,

1716 ) -> None:

1717 Element.__init__(self, *args, **kwargs)

1718

1719 self.current_source: StrPath | None = None

1720 """Path to or description of the input source being processed."""

1721

1722 self.current_line: int | None = None

1723 """Line number (1-based) of `current_source`."""

1724

1725 self.settings: Values = settings

1726 """Runtime settings data record."""

1727

1728 self.reporter: Reporter = reporter

1729 """System message generator."""

1730

1731 self.indirect_targets: list[target] = []

1732 """List of indirect target nodes."""

1733

1734 self.substitution_defs: dict[str, substitution_definition] = {}

1735 """Mapping of substitution names to substitution_definition nodes."""

1736

1737 self.substitution_names: dict[str, str] = {}

1738 """Mapping of case-normalized to case-sensitive substitution names."""

1739

1740 self.refnames: dict[str, list[Element]] = {}

1741 """Mapping of names to lists of referencing nodes."""

1742

1743 self.refids: dict[str, list[Element]] = {}

1744 """Mapping of ids to lists of referencing nodes."""

1745

1746 self.nameids: dict[str, str] = {}

1747 """Mapping of names to unique id's."""

1748

1749 self.nametypes: dict[str, bool] = {}

1750 """Mapping of names to hyperlink type. True: explicit, False: implicit.

1751 """

1752

1753 self.ids: dict[str, Element] = {}

1754 """Mapping of ids to nodes."""

1755

1756 self.footnote_refs: dict[str, list[footnote_reference]] = {}

1757 """Mapping of footnote labels to lists of footnote_reference nodes."""

1758

1759 self.citation_refs: dict[str, list[citation_reference]] = {}

1760 """Mapping of citation labels to lists of citation_reference nodes."""

1761

1762 self.autofootnotes: list[footnote] = []

1763 """List of auto-numbered footnote nodes."""

1764

1765 self.autofootnote_refs: list[footnote_reference] = []

1766 """List of auto-numbered footnote_reference nodes."""

1767

1768 self.symbol_footnotes: list[footnote] = []

1769 """List of symbol footnote nodes."""

1770

1771 self.symbol_footnote_refs: list[footnote_reference] = []

1772 """List of symbol footnote_reference nodes."""

1773

1774 self.footnotes: list[footnote] = []

1775 """List of manually-numbered footnote nodes."""

1776

1777 self.citations: list[citation] = []

1778 """List of citation nodes."""

1779

1780 self.autofootnote_start: int = 1

1781 """Initial auto-numbered footnote number."""

1782

1783 self.symbol_footnote_start: int = 0

1784 """Initial symbol footnote symbol index."""

1785

1786 self.id_counter: Counter[int] = Counter()

1787 """Numbers added to otherwise identical IDs."""

1788

1789 self.parse_messages: list[system_message] = []

1790 """System messages generated while parsing."""

1791

1792 self.transform_messages: list[system_message] = []

1793 """System messages generated while applying transforms."""

1794

1795 import docutils.transforms

1796 self.transformer: Transformer = docutils.transforms.Transformer(self)

1797 """Storage for transforms to be applied to this document."""

1798

1799 self.include_log: list[tuple[StrPath, tuple]] = []

1800 """The current source's parents (to detect inclusion loops)."""

1801

1802 self.decoration: decoration | None = None

1803 """Document's `decoration` node."""

1804

1805 self._document: document = self

1806

1807 def __getstate__(self) -> dict[str, Any]:

1808 """

1809 Return dict with unpicklable references removed.

1810 """

1811 state = self.__dict__.copy()

1812 state['reporter'] = None

1813 state['transformer'] = None

1814 return state

1815

1816 def asdom(self, dom: ModuleType | None = None) -> minidom.Document:

1817 """Return a DOM representation of this document."""

1818 if dom is None:

1819 import xml.dom.minidom as dom

1820 domroot = dom.Document()

1821 domroot.appendChild(self._dom_node(domroot))

1822 return domroot

1823

1824 def set_id(self,

1825 node: Element,

1826 msgnode: Element | None = None,

1827 suggested_prefix: str = '',

1828 ) -> str:

1829 if node['ids']:

1830 # register and check for duplicates

1831 for id in node['ids']:

1832 self.ids.setdefault(id, node)

1833 if self.ids[id] is not node:

1834 msg = self.reporter.error(f'Duplicate ID: "{id}" used by '

1835 f'{self.ids[id].starttag()} '

1836 f'and {node.starttag()}',

1837 base_node=node)

1838 if msgnode is not None:

1839 msgnode += msg

1840 return id

1841 # generate and set id

1842 id_prefix = self.settings.id_prefix

1843 auto_id_prefix = self.settings.auto_id_prefix

1844 base_id = ''

1845 id = ''

1846 for name in node['names']:

1847 if id_prefix: # allow names starting with numbers

1848 base_id = make_id('x'+name)[1:]

1849 else:

1850 base_id = make_id(name)

1851 # TODO: normalize id-prefix? (would make code simpler)

1852 id = id_prefix + base_id

1853 if base_id and id not in self.ids:

1854 break

1855 else:

1856 if base_id and auto_id_prefix.endswith('%'):

1857 # disambiguate name-derived ID

1858 # TODO: remove second condition after announcing change

1859 prefix = id + '-'

1860 else:

1861 prefix = id_prefix + auto_id_prefix

1862 if prefix.endswith('%'):

1863 prefix = f"""{prefix[:-1]}{suggested_prefix

1864 or make_id(node.tagname)}-"""

1865 while True:

1866 self.id_counter[prefix] += 1

1867 id = f'{prefix}{self.id_counter[prefix]}'

1868 if id not in self.ids:

1869 break

1870 node['ids'].append(id)

1871 self.ids[id] = node

1872 return id

1873

1874 def set_name_id_map(self,

1875 node: Element,

1876 id: str,

1877 msgnode: Element | None = None,

1878 explicit: bool = False,

1879 ) -> None:

1880 """

1881 Update the name/id mappings.

1882

1883 `self.nameids` maps names to IDs. The value ``None`` indicates

1884 that the name is a "dupname" (i.e. there are already at least

1885 two targets with the same name and type).

1886

1887 `self.nametypes` maps names to booleans representing

1888 hyperlink target type (True==explicit, False==implicit).

1889

1890 The following state transition table shows how `self.nameids` items

1891 ("id") and `self.nametypes` items ("type") change with new input

1892 (a call to this method), and what actions are performed:

1893

1894 ======== ==== ======== ==== ======== ======== ======= ======

1895 Input Old State New State Action Notes

1896 -------- -------------- -------------- ---------------- ------

1897 type id type id type dupname report

1898 ======== ==== ======== ==== ======== ======== ======= ======

1899 explicit new explicit

1900 implicit new implicit

1901 explicit old explicit None explicit new,old WARNING [#ex]_

1902 implicit old explicit old explicit new INFO [#ex]_

1903 explicit old implicit new explicit old INFO [#ex]_

1904 implicit old implicit None implicit new,old INFO [#ex]_

1905 explicit None explicit None explicit new WARNING

1906 implicit None explicit None explicit new INFO

1907 explicit None implicit new explicit

1908 implicit None implicit None implicit new INFO

1909 ======== ==== ======== ==== ======== ======== ======= ======

1910

1911 .. [#] Do not clear the name-to-id map or invalidate the old target if

1912 both old and new targets refer to identical URIs or reference names.

1913 The new target is invalidated regardless.

1914 """

1915 for name in tuple(node['names']):

1916 if name in self.nameids:

1917 self.set_duplicate_name_id(node, id, name, msgnode, explicit)

1918 # attention: modifies node['names']

1919 else:

1920 self.nameids[name] = id

1921 self.nametypes[name] = explicit

1922

1923 def set_duplicate_name_id(self,

1924 node: Element,

1925 id: str,

1926 name: str,

1927 msgnode: Element,

1928 explicit: bool,

1929 ) -> None:

1930 old_id = self.nameids[name] # None if name is only dupname

1931 old_explicit = self.nametypes[name]

1932 old_node = self.ids.get(old_id)

1933 level = 0 # system message level: 1-info, 2-warning

1934

1935 self.nametypes[name] = old_explicit or explicit

1936

1937 if old_id is not None and (

1938 'refname' in node and node['refname'] == old_node.get('refname')

1939 or 'refuri' in node and node['refuri'] == old_node.get('refuri')

1940 ):

1941 # indirect targets with same reference -> keep old target

1942 level = 1

1943 ref = node.get('refuri') or node.get('refname')

1944 s = f'Duplicate name "{name}" for external target "{ref}".'

1945 dupname(node, name)

1946 elif explicit:

1947 if old_explicit:

1948 level = 2

1949 s = f'Duplicate explicit target name: "{name}".'

1950 dupname(node, name)

1951 if old_id is not None:

1952 dupname(old_node, name)

1953 self.nameids[name] = None

1954 else: # new explicit, old implicit -> override

1955 self.nameids[name] = id

1956 if old_id is not None:

1957 level = 1

1958 s = f'Target name overrides implicit target name "{name}".'

1959 dupname(old_node, name)

1960 else: # new name is implicit

1961 level = 1

1962 s = f'Duplicate implicit target name: "{name}".'

1963 dupname(node, name)

1964 if old_id is not None and not old_explicit:

1965 dupname(old_node, name)

1966 self.nameids[name] = None

1967

1968 if level:

1969 backrefs = [id]

1970 # don't add backref id for empty targets (not shown in output)

1971 if isinstance(node, target) and 'refuri' in node:

1972 backrefs = []

1973 msg = self.reporter.system_message(level, s,

1974 backrefs=backrefs,

1975 base_node=node)

1976 # try appending near to the problem:

1977 if msgnode is not None:

1978 msgnode += msg

1979 try:

1980 msgnode.validate(recursive=False)

1981 except ValidationError:

1982 # detach -> will be handled by `Messages` transform

1983 msgnode.pop()

1984 msg.parent = None

1985

1986 def has_name(self, name: str) -> bool:

1987 return name in self.nameids

1988

1989 # "note" here is an imperative verb: "take note of".

1990 def note_implicit_target(

1991 self, target: Element, msgnode: Element | None = None) -> None:

1992 id = self.set_id(target, msgnode)

1993 self.set_name_id_map(target, id, msgnode, explicit=False)

1994

1995 def note_explicit_target(

1996 self, target: Element, msgnode: Element | None = None) -> None:

1997 id = self.set_id(target, msgnode)

1998 self.set_name_id_map(target, id, msgnode, explicit=True)

1999

2000 def note_refname(self, node: Element) -> None:

2001 self.refnames.setdefault(node['refname'], []).append(node)

2002

2003 def note_refid(self, node: Element) -> None:

2004 self.refids.setdefault(node['refid'], []).append(node)

2005

2006 def note_indirect_target(self, target: target) -> None:

2007 self.indirect_targets.append(target)

2008 if target['names']:

2009 self.note_refname(target)

2010

2011 def note_anonymous_target(self, target: target) -> None:

2012 self.set_id(target)

2013

2014 def note_autofootnote(self, footnote: footnote) -> None:

2015 self.set_id(footnote)

2016 self.autofootnotes.append(footnote)

2017

2018 def note_autofootnote_ref(self, ref: footnote_reference) -> None:

2019 self.set_id(ref)

2020 self.autofootnote_refs.append(ref)

2021

2022 def note_symbol_footnote(self, footnote: footnote) -> None:

2023 self.set_id(footnote)

2024 self.symbol_footnotes.append(footnote)

2025

2026 def note_symbol_footnote_ref(self, ref: footnote_reference) -> None:

2027 self.set_id(ref)

2028 self.symbol_footnote_refs.append(ref)

2029

2030 def note_footnote(self, footnote: footnote) -> None:

2031 self.set_id(footnote)

2032 self.footnotes.append(footnote)

2033

2034 def note_footnote_ref(self, ref: footnote_reference) -> None:

2035 self.set_id(ref)

2036 self.footnote_refs.setdefault(ref['refname'], []).append(ref)

2037 self.note_refname(ref)

2038

2039 def note_citation(self, citation: citation) -> None:

2040 self.citations.append(citation)

2041

2042 def note_citation_ref(self, ref: citation_reference) -> None:

2043 self.set_id(ref)

2044 self.citation_refs.setdefault(ref['refname'], []).append(ref)

2045 self.note_refname(ref)

2046

2047 def note_substitution_def(self,

2048 subdef: substitution_definition,

2049 def_name: str,

2050 msgnode: Element | None = None,

2051 ) -> None:

2052 name = whitespace_normalize_name(def_name)

2053 if name in self.substitution_defs:

2054 msg = self.reporter.error(

2055 'Duplicate substitution definition name: "%s".' % name,

2056 base_node=subdef)

2057 if msgnode is not None:

2058 msgnode += msg

2059 oldnode = self.substitution_defs[name]

2060 dupname(oldnode, name)

2061 # keep only the last definition:

2062 self.substitution_defs[name] = subdef

2063 # case-insensitive mapping:

2064 self.substitution_names[fully_normalize_name(name)] = name

2065

2066 def note_substitution_ref(self,

2067 subref: substitution_reference,

2068 refname: str,

2069 ) -> None:

2070 subref['refname'] = whitespace_normalize_name(refname)

2071

2072 def note_pending(

2073 self, pending: pending, priority: int | None = None) -> None:

2074 self.transformer.add_pending(pending, priority)

2075

2076 def note_parse_message(self, message: system_message) -> None:

2077 self.parse_messages.append(message)

2078

2079 def note_transform_message(self, message: system_message) -> None:

2080 self.transform_messages.append(message)

2081

2082 def note_source(self,

2083 source: StrPath | None,

2084 offset: int | None,

2085 ) -> None:

2086 self.current_source = source and os.fspath(source)

2087 if offset is None:

2088 self.current_line = offset

2089 else:

2090 self.current_line = offset + 1

2091

2092 def copy(self) -> Self:

2093 obj = self.__class__(self.settings, self.reporter,

2094 **self.attributes)

2095 obj.source = self.source

2096 obj.line = self.line

2097 return obj

2098

2099 def get_decoration(self) -> decoration:

2100 if not self.decoration:

2101 self.decoration: decoration = decoration()

2102 index = self.first_child_not_matching_class((Titular, meta))

2103 if index is None:

2104 self.append(self.decoration)

2105 else:

2106 self.insert(index, self.decoration)

2107 return self.decoration

2108

2109

2110# Bibliographic Elements

2111# ======================

2112

2113class author(Bibliographic, TextElement): pass

2114class organization(Bibliographic, TextElement): pass

2115class address(Bibliographic, FixedTextElement): pass

2116class contact(Bibliographic, TextElement): pass

2117class version(Bibliographic, TextElement): pass

2118class revision(Bibliographic, TextElement): pass

2119class status(Bibliographic, TextElement): pass

2120class date(Bibliographic, TextElement): pass

2121class copyright(Bibliographic, TextElement): pass # NoQA: A001 (builtin name)

2122

2123

2124class authors(Bibliographic, Element):

2125 """Container for author information for documents with multiple authors.

2126 """

2127 content_model: Final = ((author, '+'),

2128 (organization, '?'),

2129 (address, '?'),

2130 (contact, '?'),

2131 )

2132 # (author, organization?, address?, contact?)+

2133

2134 def validate_content(self,

2135 model: _ContentModelTuple | None = None,

2136 elements: Sequence | None = None,

2137 ) -> list:

2138 """Repeatedly test for children matching the content model.

2139

2140 Provisional.

2141 """

2142 relics = super().validate_content()

2143 while relics:

2144 relics = super().validate_content(elements=relics)

2145 return relics

2146

2147

2148# Body Elements

2149# =============

2150#

2151# General

2152# -------

2153#

2154# Miscellaneous Body Elements and related Body Subelements (Part)

2155

2156class paragraph(General, TextElement): pass

2157class rubric(Titular, General, TextElement): pass

2158

2159

2160class compound(General, Element):

2161 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2162

2163

2164class container(General, Element):

2165 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2166

2167

2168class attribution(Part, TextElement):

2169 """Visible reference to the source of a `block_quote`."""

2170

2171

2172class block_quote(General, Element):

2173 """An extended quotation, set off from the main text."""

2174 content_model: Final = ((Body, '+'), (attribution, '?'))

2175 # ((%body.elements;)+, attribution?)

2176

2177

2178class reference(General, Inline, Referential, TextElement):

2179 valid_attributes: Final = Element.valid_attributes + (

2180 'anonymous', 'name', 'refid', 'refname', 'refuri')

2181

2182

2183# Lists

2184# -----

2185#

2186# Lists (Sequential) and related Body Subelements (Part)

2187

2188class list_item(Part, Element):

2189 content_model: Final = ((Body, '*'),) # (%body.elements;)*

2190

2191

2192class bullet_list(Sequential, Element):

2193 valid_attributes: Final = Element.valid_attributes + ('bullet',)

2194 content_model: Final = ((list_item, '+'),) # (list_item+)

2195

2196

2197class enumerated_list(Sequential, Element):

2198 valid_attributes: Final = Element.valid_attributes + (

2199 'enumtype', 'prefix', 'suffix', 'start')

2200 content_model: Final = ((list_item, '+'),) # (list_item+)

2201

2202

2203class term(Part, TextElement): pass

2204class classifier(Part, TextElement): pass

2205

2206

2207class definition(Part, Element):

2208 """Definition of a `term` in a `definition_list`."""

2209 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2210

2211

2212class definition_list_item(Part, Element):

2213 content_model: Final = ((term, '.'),

2214 ((classifier, term), '*'),

2215 (definition, '.'),

2216 )

2217 # ((term, classifier*)+, definition)

2218

2219

2220class definition_list(Sequential, Element):

2221 """List of terms and their definitions.

2222

2223 Can be used for glossaries or dictionaries, to describe or

2224 classify things, for dialogues, or to itemize subtopics.

2225 """

2226 content_model: Final = ((definition_list_item, '+'),)

2227 # (definition_list_item+)

2228

2229

2230class field_name(Part, TextElement): pass

2231

2232

2233class field_body(Part, Element):

2234 content_model: Final = ((Body, '*'),) # (%body.elements;)*

2235

2236

2237class field(Part, Bibliographic, Element):

2238 content_model: Final = ((field_name, '.'), (field_body, '.'))

2239 # (field_name, field_body)

2240

2241

2242class field_list(Sequential, Element):

2243 """List of label & data pairs.

2244

2245 Typically rendered as a two-column list.

2246 Also used for extension syntax or special processing.

2247 """

2248 content_model: Final = ((field, '+'),) # (field+)

2249

2250

2251class option_string(Part, PureTextElement):

2252 """A literal command-line option. Typically monospaced."""

2253

2254

2255class option_argument(Part, PureTextElement):

2256 """Placeholder text for option arguments."""

2257 valid_attributes: Final = Element.valid_attributes + ('delimiter',)

2258

2259 def astext(self) -> str:

2260 return self.get('delimiter', ' ') + TextElement.astext(self)

2261

2262

2263class option(Part, Element):

2264 """Option element in an `option_list_item`.

2265

2266 Groups an option string with zero or more option argument placeholders.

2267 """

2268 child_text_separator: Final = ''

2269 content_model: Final = ((option_string, '.'), (option_argument, '*'))

2270 # (option_string, option_argument*)

2271

2272

2273class option_group(Part, Element):

2274 """Groups together one or more `option` elements, all synonyms."""

2275 child_text_separator: Final = ', '

2276 content_model: Final = ((option, '+'),) # (option+)

2277

2278

2279class description(Part, Element):

2280 """Describtion of a command-line option."""

2281 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2282

2283

2284class option_list_item(Part, Element):

2285 """Container for a pair of `option_group` and `description` elements.

2286 """

2287 child_text_separator: Final = ' '

2288 content_model: Final = ((option_group, '.'), (description, '.'))

2289 # (option_group, description)

2290

2291

2292class option_list(Sequential, Element):

2293 """Two-column list of command-line options and descriptions."""

2294 content_model: Final = ((option_list_item, '+'),) # (option_list_item+)

2295

2296

2297# Pre-formatted text blocks

2298# -------------------------

2299

2300class literal_block(General, FixedTextElement): pass

2301class doctest_block(General, FixedTextElement): pass

2302

2303

2304class math_block(General, FixedTextElement, PureTextElement):

2305 """Mathematical notation (display formula)."""

2306

2307

2308class line(Part, TextElement):

2309 """Single line of text in a `line_block`."""

2310 indent: str | None = None

2311

2312

2313class line_block(General, Element):

2314 """Sequence of lines and nested line blocks.

2315 """

2316 # recursive content model: (line | line_block)+

2317

2318

2319line_block.content_model = (((line, line_block), '+'),)

2320

2321

2322# Admonitions

2323# -----------

2324# distinctive and self-contained notices

2325

2326class attention(Admonition, Element): pass

2327class caution(Admonition, Element): pass

2328class danger(Admonition, Element): pass

2329class error(Admonition, Element): pass

2330class important(Admonition, Element): pass

2331class note(Admonition, Element): pass

2332class tip(Admonition, Element): pass

2333class hint(Admonition, Element): pass

2334class warning(Admonition, Element): pass

2335

2336

2337class admonition(Admonition, Element):

2338 content_model: Final = ((title, '.'), (Body, '+'))

2339 # (title, (%body.elements;)+)

2340

2341

2342# Footnote and citation

2343# ---------------------

2344

2345class label(Part, PureTextElement):

2346 """Visible identifier for footnotes and citations."""

2347

2348

2349class footnote(General, BackLinkable, Element, Labeled, Targetable):

2350 """Labelled note providing additional context (footnote or endnote)."""

2351 valid_attributes: Final = Element.valid_attributes + ('auto', 'backrefs')

2352 content_model: Final = ((label, '?'), (Body, '+'))

2353 # (label?, (%body.elements;)+)

2354 # The label will become required in Docutils 1.0.

2355

2356

2357class citation(General, BackLinkable, Element, Labeled, Targetable):

2358 content_model: Final = ((label, '.'), (Body, '+'))

2359 # (label, (%body.elements;)+)

2360

2361

2362# Graphical elements

2363# ------------------

2364

2365class image(General, Inline, Element):

2366 """Reference to an image resource.

2367

2368 May be body element or inline element.

2369 """

2370 valid_attributes: Final = Element.valid_attributes + (

2371 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')

2372

2373 def astext(self) -> str:

2374 return self.get('alt', '')

2375

2376

2377class caption(Part, TextElement): pass

2378

2379

2380class legend(Part, Element):

2381 """A wrapper for text accompanying a `figure` that is not the caption."""

2382 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2383

2384

2385class figure(General, Element):

2386 """A formal figure, generally an illustration, with a title."""

2387 valid_attributes: Final = Element.valid_attributes + ('align', 'width')

2388 content_model: Final = (((image, reference), '.'),

2389 (caption, '?'),

2390 (legend, '?'),

2391 )

2392 # (image, ((caption, legend?) | legend))

2393 # TODO: According to the DTD, a caption or legend is required

2394 # but rST allows "bare" figures which are formatted differently from

2395 # images (floating in LaTeX, nested in a <figure> in HTML). [bugs: #489]

2396

2397

2398# Tables

2399# ------

2400

2401class entry(Part, Element):

2402 """An entry in a `row` (a table cell)."""

2403 valid_attributes: Final = Element.valid_attributes + (

2404 'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',

2405 'morerows', 'namest', 'nameend', 'rowsep', 'valign')

2406 content_model: Final = ((Body, '*'),)

2407 # %tbl.entry.mdl -> (%body.elements;)*

2408

2409

2410class row(Part, Element):

2411 """Row of table cells."""

2412 valid_attributes: Final = Element.valid_attributes + ('rowsep', 'valign')

2413 content_model: Final = ((entry, '+'),) # (%tbl.row.mdl;) -> entry+

2414

2415

2416class colspec(Part, Element):

2417 """Specifications for a column in a `tgroup`."""

2418 valid_attributes: Final = Element.valid_attributes + (

2419 'align', 'char', 'charoff', 'colname', 'colnum',

2420 'colsep', 'colwidth', 'rowsep', 'stub')

2421

2422 def propwidth(self) -> int|float:

2423 """Return numerical value of "colwidth__" attribute. Default 1.

2424

2425 Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.

2426

2427 Provisional.

2428

2429 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth

2430 """

2431 # Move current implementation of validate_colwidth() here

2432 # in Docutils 1.0

2433 return validate_colwidth(self.get('colwidth', ''))

2434

2435

2436class thead(Part, Element):

2437 """Row(s) that form the head of a `tgroup`."""

2438 valid_attributes: Final = Element.valid_attributes + ('valign',)

2439 content_model: Final = ((row, '+'),) # (row+)

2440

2441

2442class tbody(Part, Element):

2443 """Body of a `tgroup`."""

2444 valid_attributes: Final = Element.valid_attributes + ('valign',)

2445 content_model: Final = ((row, '+'),) # (row+)

2446

2447

2448class tgroup(Part, Element):

2449 """A portion of a table. Most tables have just one `tgroup`."""

2450 valid_attributes: Final = Element.valid_attributes + (

2451 'align', 'cols', 'colsep', 'rowsep')

2452 content_model: Final = ((colspec, '*'), (thead, '?'), (tbody, '.'))

2453 # (colspec*, thead?, tbody)

2454

2455

2456class table(General, Element):

2457 """A data arrangement with rows and columns."""

2458 valid_attributes: Final = Element.valid_attributes + (

2459 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')

2460 content_model: Final = ((title, '?'), (tgroup, '+'))

2461 # (title?, tgroup+)

2462

2463

2464# Special purpose elements

2465# ------------------------

2466# Body elements for internal use or special requests.

2467

2468class comment(Invisible, FixedTextElement, PureTextElement):

2469 """Author notes, hidden from the output."""

2470

2471

2472class substitution_definition(Invisible, TextElement):

2473 valid_attributes: Final = Element.valid_attributes + ('ltrim', 'rtrim')

2474

2475

2476class target(Invisible, Inline, TextElement, Targetable):

2477 valid_attributes: Final = Element.valid_attributes + (

2478 'anonymous', 'refid', 'refname', 'refuri')

2479

2480

2481class system_message(Special, BackLinkable, PreBibliographic, Element):

2482 """

2483 System message element.

2484

2485 Do not instantiate this class directly; use

2486 ``document.reporter.info/warning/error/severe()`` instead.

2487 """

2488 valid_attributes: Final = BackLinkable.valid_attributes + (

2489 'level', 'line', 'type')

2490 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2491

2492 def __init__(self,

2493 message: str | None = None,

2494 *children,

2495 **attributes: Any,

2496 ) -> None:

2497 rawsource = attributes.pop('rawsource', '')

2498 if message:

2499 p = paragraph('', message)

2500 children = (p,) + children

2501 try:

2502 Element.__init__(self, rawsource, *children, **attributes)

2503 except: # NoQA: E722 (catchall)

2504 print('system_message: children=%r' % (children,))

2505 raise

2506

2507 def astext(self) -> str:

2508 line = self.get('line', '')

2509 return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],

2510 self['level'], Element.astext(self))

2511

2512

2513class pending(Invisible, Element):

2514 """

2515 Placeholder for pending operations.

2516

2517 The "pending" element is used to encapsulate a pending operation: the

2518 operation (transform), the point at which to apply it, and any data it

2519 requires. Only the pending operation's location within the document is

2520 stored in the public document tree (by the "pending" object itself); the

2521 operation and its data are stored in the "pending" object's internal

2522 instance attributes.

2523

2524 For example, say you want a table of contents in your reStructuredText

2525 document. The easiest way to specify where to put it is from within the

2526 document, with a directive::

2527

2528 .. contents::

2529

2530 But the "contents" directive can't do its work until the entire document

2531 has been parsed and possibly transformed to some extent. So the directive

2532 code leaves a placeholder behind that will trigger the second phase of its

2533 processing, something like this::

2534

2535 <pending ...public attributes...> + internal attributes

2536

2537 Use `document.note_pending()` so that the

2538 `docutils.transforms.Transformer` stage of processing can run all pending

2539 transforms.

2540 """

2541

2542 def __init__(self,

2543 transform: Transform,

2544 details: Mapping[str, Any] | None = None,

2545 rawsource: str = '',

2546 *children,

2547 **attributes: Any,

2548 ) -> None:

2549 Element.__init__(self, rawsource, *children, **attributes)

2550

2551 self.transform: Transform = transform

2552 """The `docutils.transforms.Transform` class implementing the pending

2553 operation."""

2554

2555 self.details: Mapping[str, Any] = details or {}

2556 """Detail data (dictionary) required by the pending operation."""

2557

2558 def pformat(self, indent: str = ' ', level: int = 0) -> str:

2559 internals = ['.. internal attributes:',

2560 ' .transform: %s.%s' % (self.transform.__module__,

2561 self.transform.__name__),

2562 ' .details:']

2563 details = sorted(self.details.items())

2564 for key, value in details:

2565 if isinstance(value, Node):

2566 internals.append('%7s%s:' % ('', key))

2567 internals.extend(['%9s%s' % ('', line)

2568 for line in value.pformat().splitlines()])

2569 elif (value

2570 and isinstance(value, list)

2571 and isinstance(value[0], Node)):

2572 internals.append('%7s%s:' % ('', key))

2573 for v in value:

2574 internals.extend(['%9s%s' % ('', line)

2575 for line in v.pformat().splitlines()])

2576 else:

2577 internals.append('%7s%s: %r' % ('', key, value))

2578 return (Element.pformat(self, indent, level)

2579 + ''.join((' %s%s\n' % (indent * level, line))

2580 for line in internals))

2581

2582 def copy(self) -> Self:

2583 obj = self.__class__(self.transform, self.details, self.rawsource,

2584 **self.attributes)

2585 obj._document = self._document

2586 obj.source = self.source

2587 obj.line = self.line

2588 return obj

2589

2590

2591class raw(Special, Inline, PreBibliographic,

2592 FixedTextElement, PureTextElement):

2593 """Raw data that is to be passed untouched to the Writer.

2594

2595 Can be used as Body element or Inline element.

2596 """

2597 valid_attributes: Final = Element.valid_attributes + (

2598 'format', 'xml:space')

2599

2600

2601# Inline Elements

2602# ===============

2603

2604class abbreviation(Inline, TextElement): pass

2605class acronym(Inline, TextElement): pass

2606class emphasis(Inline, TextElement): pass

2607class generated(Inline, TextElement): pass

2608class inline(Inline, TextElement): pass

2609class literal(Inline, TextElement): pass

2610class strong(Inline, TextElement): pass

2611class subscript(Inline, TextElement): pass

2612class superscript(Inline, TextElement): pass

2613class title_reference(Inline, TextElement): pass

2614

2615

2616class footnote_reference(Inline, Referential, PureTextElement):

2617 valid_attributes: Final = Element.valid_attributes + (

2618 'auto', 'refid', 'refname')

2619

2620

2621class citation_reference(Inline, Referential, PureTextElement):

2622 valid_attributes: Final = Element.valid_attributes + ('refid', 'refname')

2623

2624

2625class substitution_reference(Inline, TextElement):

2626 valid_attributes: Final = Element.valid_attributes + ('refname',)

2627

2628

2629class math(Inline, PureTextElement):

2630 """Mathematical notation in running text."""

2631

2632

2633class problematic(Inline, TextElement):

2634 valid_attributes: Final = Element.valid_attributes + (

2635 'refid', 'refname', 'refuri')

2636

2637

2638# ========================================

2639# Auxiliary Classes, Functions, and Data

2640# ========================================

2641

2642node_class_names: Sequence[str] = """

2643 Text

2644 abbreviation acronym address admonition attention attribution author

2645 authors

2646 block_quote bullet_list

2647 caption caution citation citation_reference classifier colspec comment

2648 compound contact container copyright

2649 danger date decoration definition definition_list definition_list_item

2650 description docinfo doctest_block document

2651 emphasis entry enumerated_list error

2652 field field_body field_list field_name figure footer

2653 footnote footnote_reference

2654 generated

2655 header hint

2656 image important inline

2657 label legend line line_block list_item literal literal_block

2658 math math_block meta

2659 note

2660 option option_argument option_group option_list option_list_item

2661 option_string organization

2662 paragraph pending problematic

2663 raw reference revision row rubric

2664 section sidebar status strong subscript substitution_definition

2665 substitution_reference subtitle superscript system_message

2666 table target tbody term tgroup thead tip title title_reference topic

2667 transition

2668 version

2669 warning""".split()

2670"""A list of names of all concrete Node subclasses."""

2671

2672

2673class NodeVisitor:

2674 """

2675 "Visitor" pattern [GoF95]_ abstract superclass implementation for

2676 document tree traversals.

2677

2678 Each node class has corresponding methods, doing nothing by

2679 default; override individual methods for specific and useful

2680 behaviour. The `dispatch_visit()` method is called by

2681 `Node.walk()` upon entering a node. `Node.walkabout()` also calls

2682 the `dispatch_departure()` method before exiting a node.

2683

2684 The dispatch methods call "``visit_`` + node class name" or

2685 "``depart_`` + node class name", resp.

2686

2687 This is a base class for visitors whose ``visit_...`` & ``depart_...``

2688 methods must be implemented for *all* compulsory node types encountered

2689 (such as for `docutils.writers.Writer` subclasses).

2690 Unimplemented methods will raise exceptions (except for optional nodes).

2691

2692 For sparse traversals, where only certain node types are of interest, use

2693 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform

2694 processing is desired, subclass `GenericNodeVisitor`.

2695

2696 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of

2697 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,

2698 1995.

2699 """

2700

2701 optional: ClassVar[tuple[str, ...]] = ('meta',)

2702 """

2703 Tuple containing node class names (as strings).

2704

2705 No exception will be raised if writers do not implement visit

2706 or departure functions for these node classes.

2707

2708 Used to ensure transitional compatibility with existing 3rd-party writers.

2709 """

2710

2711 def __init__(self, document: document, /) -> None:

2712 self.document: document = document

2713

2714 def dispatch_visit(self, node) -> None:

2715 """

2716 Call self."``visit_`` + node class name" with `node` as

2717 parameter. If the ``visit_...`` method does not exist, call

2718 self.unknown_visit.

2719 """

2720 node_name = node.__class__.__name__

2721 method = getattr(self, 'visit_' + node_name, self.unknown_visit)

2722 self.document.reporter.debug(

2723 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'

2724 % (method.__name__, node_name))

2725 return method(node)

2726

2727 def dispatch_departure(self, node) -> None:

2728 """

2729 Call self."``depart_`` + node class name" with `node` as

2730 parameter. If the ``depart_...`` method does not exist, call

2731 self.unknown_departure.

2732 """

2733 node_name = node.__class__.__name__

2734 method = getattr(self, 'depart_' + node_name, self.unknown_departure)

2735 self.document.reporter.debug(

2736 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'

2737 % (method.__name__, node_name))

2738 return method(node)

2739

2740 def unknown_visit(self, node) -> None:

2741 """

2742 Called when entering unknown `Node` types.

2743

2744 Raise an exception unless overridden.

2745 """

2746 if (self.document.settings.strict_visitor

2747 or node.__class__.__name__ not in self.optional):

2748 raise NotImplementedError(

2749 '%s visiting unknown node type: %s'

2750 % (self.__class__, node.__class__.__name__))

2751

2752 def unknown_departure(self, node) -> None:

2753 """

2754 Called before exiting unknown `Node` types.

2755

2756 Raise exception unless overridden.

2757 """

2758 if (self.document.settings.strict_visitor

2759 or node.__class__.__name__ not in self.optional):

2760 raise NotImplementedError(

2761 '%s departing unknown node type: %s'

2762 % (self.__class__, node.__class__.__name__))

2763

2764

2765class SparseNodeVisitor(NodeVisitor):

2766 """

2767 Base class for sparse traversals, where only certain node types are of

2768 interest. When ``visit_...`` & ``depart_...`` methods should be

2769 implemented for *all* node types (such as for `docutils.writers.Writer`

2770 subclasses), subclass `NodeVisitor` instead.

2771 """

2772

2773

2774class GenericNodeVisitor(NodeVisitor):

2775 """

2776 Generic "Visitor" abstract superclass, for simple traversals.

2777

2778 Unless overridden, each ``visit_...`` method calls `default_visit()`, and

2779 each ``depart_...`` method (when using `Node.walkabout()`) calls

2780 `default_departure()`. `default_visit()` (and `default_departure()`) must

2781 be overridden in subclasses.

2782

2783 Define fully generic visitors by overriding `default_visit()` (and

2784 `default_departure()`) only. Define semi-generic visitors by overriding

2785 individual ``visit_...()`` (and ``depart_...()``) methods also.

2786

2787 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should

2788 be overridden for default behavior.

2789 """

2790

2791 def default_visit(self, node):

2792 """Override for generic, uniform traversals."""

2793 raise NotImplementedError

2794

2795 def default_departure(self, node):

2796 """Override for generic, uniform traversals."""

2797 raise NotImplementedError

2798

2799

2800def _call_default_visit(self: GenericNodeVisitor, node) -> None:

2801 self.default_visit(node)

2802

2803

2804def _call_default_departure(self: GenericNodeVisitor, node) -> None:

2805 self.default_departure(node)

2806

2807

2808def _nop(self: SparseNodeVisitor, node) -> None:

2809 pass

2810

2811

2812def _add_node_class_names(names) -> None:

2813 """Save typing with dynamic assignments:"""

2814 for _name in names:

2815 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)

2816 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)

2817 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)

2818 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)

2819

2820

2821_add_node_class_names(node_class_names)

2822

2823

2824class TreeCopyVisitor(GenericNodeVisitor):

2825 """

2826 Make a complete copy of a tree or branch, including element attributes.

2827 """

2828

2829 def __init__(self, document: document) -> None:

2830 super().__init__(document)

2831 self.parent_stack: list[list] = []

2832 self.parent: list = []

2833

2834 def get_tree_copy(self):

2835 return self.parent[0]

2836

2837 def default_visit(self, node) -> None:

2838 """Copy the current node, and make it the new acting parent."""

2839 newnode = node.copy()

2840 self.parent.append(newnode)

2841 self.parent_stack.append(self.parent)

2842 self.parent = newnode

2843

2844 def default_departure(self, node) -> None:

2845 """Restore the previous acting parent."""

2846 self.parent = self.parent_stack.pop()

2847

2848

2849# Custom Exceptions

2850# =================

2851

2852class ValidationError(ValueError):

2853 """Invalid Docutils Document Tree Element."""

2854 def __init__(self, msg: str, problematic_element: Element = None) -> None:

2855 super().__init__(msg)

2856 self.problematic_element = problematic_element

2857

2858

2859class TreePruningException(Exception):

2860 """

2861 Base class for `NodeVisitor`-related tree pruning exceptions.

2862

2863 Raise subclasses from within ``visit_...`` or ``depart_...`` methods

2864 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune

2865 the tree traversed.

2866 """

2867

2868

2869class SkipChildren(TreePruningException):

2870 """

2871 Do not visit any children of the current node. The current node's

2872 siblings and ``depart_...`` method are not affected.

2873 """

2874

2875

2876class SkipSiblings(TreePruningException):

2877 """

2878 Do not visit any more siblings (to the right) of the current node. The

2879 current node's children and its ``depart_...`` method are not affected.

2880 """

2881

2882

2883class SkipNode(TreePruningException):

2884 """

2885 Do not visit the current node's children, and do not call the current

2886 node's ``depart_...`` method.

2887 """

2888

2889

2890class SkipDeparture(TreePruningException):

2891 """

2892 Do not call the current node's ``depart_...`` method. The current node's

2893 children and siblings are not affected.

2894 """

2895

2896

2897class NodeFound(TreePruningException):

2898 """

2899 Raise to indicate that the target of a search has been found. This

2900 exception must be caught by the client; it is not caught by the traversal

2901 code.

2902 """

2903

2904

2905class StopTraversal(TreePruningException):

2906 """

2907 Stop the traversal altogether. The current node's ``depart_...`` method

2908 is not affected. The parent nodes ``depart_...`` methods are also called

2909 as usual. No other nodes are visited. This is an alternative to

2910 NodeFound that does not cause exception handling to trickle up to the

2911 caller.

2912 """

2913

2914

2915# definition moved here from `utils` to avoid circular import dependency

2916def unescape(text: str,

2917 restore_backslashes: bool = False,

2918 respect_whitespace: bool = False,

2919 ) -> str:

2920 """

2921 Return a string with nulls removed or restored to backslashes.

2922 Backslash-escaped spaces are also removed.

2923 """

2924 # `respect_whitespace` is ignored (since introduction 2016-12-16)

2925 if restore_backslashes:

2926 return text.replace('\x00', '\\')

2927 else:

2928 for sep in ['\x00 ', '\x00\n', '\x00']:

2929 text = ''.join(text.split(sep))

2930 return text

2931

2932

2933def make_id(string: str) -> str:

2934 """

2935 Convert `string` into an identifier and return it.

2936

2937 Docutils identifiers will conform to the regular expression

2938 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"

2939 and "id" attributes) should have no underscores, colons, or periods.

2940 Hyphens may be used.

2941

2942 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:

2943

2944 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be

2945 followed by any number of letters, digits ([0-9]), hyphens ("-"),

2946 underscores ("_"), colons (":"), and periods (".").

2947

2948 - However the `CSS1 spec`_ defines identifiers based on the "name" token,

2949 a tighter interpretation ("flex" tokenizer notation; "latin1" and

2950 "escape" 8-bit characters have been replaced with entities)::

2951

2952 unicode \\[0-9a-f]{1,4}

2953 latin1 [¡-ÿ]

2954 escape {unicode}|\\[ -~¡-ÿ]

2955 nmchar [-a-z0-9]|{latin1}|{escape}

2956 name {nmchar}+

2957

2958 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),

2959 or periods ("."), therefore "class" and "id" attributes should not contain

2960 these characters. They should be replaced with hyphens ("-"). Combined

2961 with HTML's requirements (the first character must be a letter; no

2962 "unicode", "latin1", or "escape" characters), this results in the

2963 ``[a-z](-?[a-z0-9]+)*`` pattern.

2964

2965 .. _HTML 4.01 spec: https://www.w3.org/TR/html401

2966 .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1

2967 """

2968 id = string.lower()

2969 id = id.translate(_non_id_translate_digraphs)

2970 id = id.translate(_non_id_translate)

2971 # get rid of non-ascii characters.

2972 # 'ascii' lowercase to prevent problems with turkish locale.

2973 id = unicodedata.normalize(

2974 'NFKD', id).encode('ascii', 'ignore').decode('ascii')

2975 # shrink runs of whitespace and replace by hyphen

2976 id = _non_id_chars.sub('-', ' '.join(id.split()))

2977 id = _non_id_at_ends.sub('', id)

2978 return str(id)

2979

2980

2981_non_id_chars: re.Pattern[str] = re.compile('[^a-z0-9]+')

2982_non_id_at_ends: re.Pattern[str] = re.compile('^[-0-9]+|-+$')

2983_non_id_translate: dict[int, str] = {

2984 0x00f8: 'o', # o with stroke

2985 0x0111: 'd', # d with stroke

2986 0x0127: 'h', # h with stroke

2987 0x0131: 'i', # dotless i

2988 0x0142: 'l', # l with stroke

2989 0x0167: 't', # t with stroke

2990 0x0180: 'b', # b with stroke

2991 0x0183: 'b', # b with topbar

2992 0x0188: 'c', # c with hook

2993 0x018c: 'd', # d with topbar

2994 0x0192: 'f', # f with hook

2995 0x0199: 'k', # k with hook

2996 0x019a: 'l', # l with bar

2997 0x019e: 'n', # n with long right leg

2998 0x01a5: 'p', # p with hook

2999 0x01ab: 't', # t with palatal hook

3000 0x01ad: 't', # t with hook

3001 0x01b4: 'y', # y with hook

3002 0x01b6: 'z', # z with stroke

3003 0x01e5: 'g', # g with stroke

3004 0x0225: 'z', # z with hook

3005 0x0234: 'l', # l with curl

3006 0x0235: 'n', # n with curl

3007 0x0236: 't', # t with curl

3008 0x0237: 'j', # dotless j

3009 0x023c: 'c', # c with stroke

3010 0x023f: 's', # s with swash tail

3011 0x0240: 'z', # z with swash tail

3012 0x0247: 'e', # e with stroke

3013 0x0249: 'j', # j with stroke

3014 0x024b: 'q', # q with hook tail

3015 0x024d: 'r', # r with stroke

3016 0x024f: 'y', # y with stroke

3017}

3018_non_id_translate_digraphs: dict[int, str] = {

3019 0x00df: 'sz', # ligature sz

3020 0x00e6: 'ae', # ae

3021 0x0153: 'oe', # ligature oe

3022 0x0238: 'db', # db digraph

3023 0x0239: 'qp', # qp digraph

3024}

3025

3026

3027def dupname(node: Element, name: str) -> None:

3028 node['dupnames'].append(name)

3029 node['names'].remove(name)

3030 # Assume that `node` is referenced, even though it isn't;

3031 # we don't want to throw unnecessary system_messages.

3032 node.referenced = True

3033

3034

3035def fully_normalize_name(name: str) -> str:

3036 """Return a case- and whitespace-normalized name."""

3037 return ' '.join(name.lower().split())

3038

3039

3040def whitespace_normalize_name(name: str) -> str:

3041 """Return a whitespace-normalized name."""

3042 return ' '.join(name.split())

3043

3044

3045def serial_escape(value: str) -> str:

3046 """Escape string values that are elements of a list, for serialization."""

3047 return value.replace('\\', r'\\').replace(' ', r'\ ')

3048

3049

3050def split_name_list(s: str) -> list[str]:

3051 r"""Split a string at non-escaped whitespace.

3052

3053 Backslashes escape internal whitespace (cf. `serial_escape()`).

3054 Return list of "names" (after removing escaping backslashes).

3055

3056 >>> split_name_list(r'a\ n\ame two\\ n\\ames'),

3057 ['a name', 'two\\', r'n\ames']

3058

3059 Provisional.

3060 """

3061 s = s.replace('\\', '\x00') # escape with NULL char

3062 s = s.replace('\x00\x00', '\\') # unescape backslashes

3063 s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL

3064 names = s.split(' ')

3065 # restore internal spaces, drop other escaping characters

3066 return [name.replace('\x00\x00', ' ').replace('\x00', '')

3067 for name in names]

3068

3069

3070def pseudo_quoteattr(value: str) -> str:

3071 """Quote attributes for pseudo-xml"""

3072 return '"%s"' % value

3073

3074

3075def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'

3076 ) -> tuple[int|float, str]:

3077 """Parse a measure__, return value + unit.

3078

3079 `unit_pattern` is a regular expression describing recognized units.

3080 The default is suited for (but not limited to) CSS3 units and SI units.

3081 It matches runs of ASCII letters or Greek mu, a single percent sign,

3082 or no unit.

3083

3084 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure

3085

3086 Provisional.

3087 """

3088 match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)

3089 try:

3090 try:

3091 value = int(match.group(1))

3092 except ValueError:

3093 value = float(match.group(1))

3094 unit = match.group(2)

3095 except (AttributeError, ValueError):

3096 raise ValueError(f'"{measure}" is no valid measure.')

3097 return value, unit

3098

3099

3100# Methods to validate `Element attribute`__ values.

3101

3102# Ensure the expected Python `data type`__, normalize, and check for

3103# restrictions.

3104#

3105# The methods can be used to convert `str` values (eg. from an XML

3106# representation) or to validate an existing document tree or node.

3107#

3108# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,

3109# and the `attribute_validating_functions` mapping below.

3110#

3111# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference

3112# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types

3113

3114def create_keyword_validator(*keywords: str) -> Callable[[str], str]:

3115 """

3116 Return a function that validates a `str` against given `keywords`.

3117

3118 Provisional.

3119 """

3120 def validate_keywords(value: str) -> str:

3121 if value not in keywords:

3122 allowed = '", \"'.join(keywords)

3123 raise ValueError(f'"{value}" is not one of "{allowed}".')

3124 return value

3125 return validate_keywords

3126

3127

3128def validate_identifier(value: str) -> str:

3129 """

3130 Validate identifier key or class name.

3131

3132 Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.

3133

3134 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type

3135

3136 Provisional.

3137 """

3138 if value != make_id(value):

3139 raise ValueError(f'"{value}" is no valid id or class name.')

3140 return value

3141

3142

3143def validate_identifier_list(value: str | list[str]) -> list[str]:

3144 """

3145 A (space-separated) list of ids or class names.

3146

3147 `value` may be a `list` or a `str` with space separated

3148 ids or class names (cf. `validate_identifier()`).

3149

3150 Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.

3151

3152 __ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type

3153 __ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type

3154 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type

3155

3156 Provisional.

3157 """

3158 if isinstance(value, str):

3159 value = value.split()

3160 for token in value:

3161 validate_identifier(token)

3162 return value

3163

3164

3165def validate_measure(measure: str) -> str:

3166 """

3167 Validate a measure__ (number + optional unit). Return normalized `str`.

3168

3169 See `parse_measure()` for a function returning a "number + unit" tuple.

3170

3171 The unit may be a run of ASCII letters or Greek mu, a single percent sign,

3172 or the empty string. Case is preserved.

3173

3174 Provisional.

3175

3176 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure

3177 """

3178 value, unit = parse_measure(measure)

3179 return f'{value}{unit}'

3180

3181

3182def validate_colwidth(measure: str|int|float) -> int|float:

3183 """Validate the "colwidth__" attribute.

3184

3185 Provisional:

3186 `measure` must be a `str` and will be returned as normalized `str`

3187 (with unit "*" for proportional values) in Docutils 1.0.

3188

3189 The default unit will change to "pt" in Docutils 2.0.

3190

3191 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth

3192 """

3193 if isinstance(measure, (int, float)):

3194 value = measure

3195 elif measure in ('*', ''): # short for '1*'

3196 value = 1

3197 else:

3198 try:

3199 value, _unit = parse_measure(measure, unit_pattern='[*]?')

3200 except ValueError:

3201 value = -1

3202 if value <= 0:

3203 raise ValueError(f'"{measure}" is no proportional measure.')

3204 return value

3205

3206

3207def validate_NMTOKEN(value: str) -> str:

3208 """

3209 Validate a "name token": a `str` of ASCII letters, digits, and [-._].

3210

3211 Provisional.

3212 """

3213 if not re.fullmatch('[-._A-Za-z0-9]+', value):

3214 raise ValueError(f'"{value}" is no NMTOKEN.')

3215 return value

3216

3217

3218def validate_NMTOKENS(value: str | list[str]) -> list[str]:

3219 """

3220 Validate a list of "name tokens".

3221

3222 Provisional.

3223 """

3224 if isinstance(value, str):

3225 value = value.split()

3226 for token in value:

3227 validate_NMTOKEN(token)

3228 return value

3229

3230

3231def validate_refname_list(value: str | list[str]) -> list[str]:

3232 """

3233 Validate a list of `reference names`__.

3234

3235 Reference names may contain all characters;

3236 whitespace is normalized (cf, `whitespace_normalize_name()`).

3237

3238 `value` may be either a `list` of names or a `str` with

3239 space separated names (with internal spaces backslash escaped

3240 and literal backslashes doubled cf. `serial_escape()`).

3241

3242 Return a list of whitespace-normalized, unescaped reference names.

3243

3244 Provisional.

3245

3246 __ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name

3247 """

3248 if isinstance(value, str):

3249 value = split_name_list(value)

3250 return [whitespace_normalize_name(name) for name in value]

3251

3252

3253def validate_yesorno(value: str | int | bool) -> bool:

3254 """Validate a `%yesorno`__ (flag) value.

3255

3256 The string literal "0" evaluates to ``False``, all other

3257 values are converterd with `bool()`.

3258

3259 __ https://docutils.sourceforge.io/docs/ref/doctree.html#yesorno

3260 """

3261 if value == "0":

3262 return False

3263 return bool(value)

3264

3265

3266ATTRIBUTE_VALIDATORS: dict[str, Callable[[str], Any]] = {

3267 'alt': str, # CDATA

3268 'align': str,

3269 'anonymous': validate_yesorno,

3270 'auto': str, # CDATA (only '1' or '*' are used in rST)

3271 'backrefs': validate_identifier_list,

3272 'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)

3273 'classes': validate_identifier_list,

3274 'char': str, # from Exchange Table Model (CALS), currently ignored

3275 'charoff': validate_NMTOKEN, # from CALS, currently ignored

3276 'colname': validate_NMTOKEN, # from CALS, currently ignored

3277 'colnum': int, # from CALS, currently ignored

3278 'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".

3279 'colsep': validate_yesorno,

3280 'colwidth': validate_colwidth, # see docstring for pending changes

3281 'content': str, # <meta>

3282 'delimiter': str,

3283 'dir': create_keyword_validator('ltr', 'rtl', 'auto'), # <meta>

3284 'dupnames': validate_refname_list,

3285 'enumtype': create_keyword_validator('arabic', 'loweralpha', 'lowerroman',

3286 'upperalpha', 'upperroman'),

3287 'format': str, # CDATA (space separated format names)

3288 'frame': create_keyword_validator('top', 'bottom', 'topbot', 'all',

3289 'sides', 'none'), # from CALS, ignored

3290 'height': validate_measure,

3291 'http-equiv': str, # <meta>

3292 'ids': validate_identifier_list,

3293 'lang': str, # <meta>

3294 'level': int,

3295 'line': int,

3296 'ltrim': validate_yesorno,

3297 'loading': create_keyword_validator('embed', 'link', 'lazy'),

3298 'media': str, # <meta>

3299 'morecols': int,

3300 'morerows': int,

3301 'name': whitespace_normalize_name, # in <reference> (deprecated)

3302 # 'name': node_attributes.validate_NMTOKEN, # in <meta>

3303 'names': validate_refname_list,

3304 'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored

3305 'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored

3306 'pgwide': validate_yesorno, # from CALS, currently ignored

3307 'prefix': str,

3308 'refid': validate_identifier,

3309 'refname': whitespace_normalize_name,

3310 'refuri': str,

3311 'rowsep': validate_yesorno,

3312 'rtrim': validate_yesorno,

3313 'scale': int,

3314 'scheme': str,

3315 'source': str,

3316 'start': int,

3317 'stub': validate_yesorno,

3318 'suffix': str,

3319 'title': str,

3320 'type': validate_NMTOKEN,

3321 'uri': str,

3322 'valign': create_keyword_validator('top', 'middle', 'bottom'), # from CALS

3323 'width': validate_measure,

3324 'xml:space': create_keyword_validator('default', 'preserve'),

3325 }

3326"""

3327Mapping of `attribute names`__ to validating functions.

3328

3329Provisional.

3330

3331__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference

3332"""