Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/nodes.py: 62%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Maintainer: docutils-develop@lists.sourceforge.net

4# Copyright: This module has been placed in the public domain.

6"""

7Docutils document tree element class library.

9The relationships and semantics of elements and attributes is documented in

10`The Docutils Document Tree`__.

12Classes in CamelCase are abstract base classes or auxiliary classes. The one

13exception is `Text`, for a text (PCDATA) node; uppercase is used to

14differentiate from element classes. Classes in lower_case_with_underscores

15are element classes, matching the XML element generic identifiers in the DTD_.

17The position of each node (the level at which it can occur) is significant and

18is represented by abstract base classes (`Root`, `Structural`, `Body`,

19`Inline`, etc.). Certain transformations will be easier because we can use

20``isinstance(node, base_class)`` to determine the position of the node in the

21hierarchy.

23__ https://docutils.sourceforge.io/docs/ref/doctree.html

24.. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd

25"""

27from __future__ import annotations

29__docformat__ = 'reStructuredText'

31import os

32import re

33import sys

34import unicodedata

35import warnings

36from collections import Counter

37# import xml.dom.minidom as dom # -> conditional import in Node.asdom()

38# and document.asdom()

40# import docutils.transforms # -> delayed import in document.__init__()

42TYPE_CHECKING = False

43if TYPE_CHECKING:

44 from collections.abc import (Callable, Iterable, Iterator,

45 Mapping, Sequence)

46 from types import ModuleType

47 from typing import Any, ClassVar, Final, Literal, Self, SupportsIndex

49 from docutils.utils._typing import TypeAlias

51 from xml.dom import minidom

53 from docutils.frontend import Values

54 from docutils.transforms import Transformer, Transform

55 from docutils.utils import Reporter

57 _ContentModelCategory: TypeAlias = tuple['Element' | tuple['Element', ...]]

58 _ContentModelQuantifier = Literal['.', '?', '+', '*']

59 _ContentModelItem: TypeAlias = tuple[_ContentModelCategory,

60 _ContentModelQuantifier]

61 _ContentModelTuple: TypeAlias = tuple[_ContentModelItem, ...]

63 StrPath: TypeAlias = str | os.PathLike[str]

64 """File system path. No bytes!"""

66 _UpdateFun: TypeAlias = Callable[[str, Any, bool], None]

69# ==============================

70# Functional Node Base Classes

71# ==============================

73class Node:

74 """Abstract base class of nodes in a document tree."""

76 parent: Element | None = None

77 """Back-reference to the Node immediately containing this Node."""

79 children: Sequence # defined in subclasses

80 """List of child nodes (Elements or Text).

82 Override in subclass instances that are not terminal nodes.

83 """

85 source: StrPath | None = None

86 """Path or description of the input source which generated this Node."""

88 line: int | None = None

89 """The line number (1-based) of the beginning of this Node in `source`."""

91 tagname: str # defined in subclasses

92 """The element generic identifier."""

94 _document: document | None = None

96 @property

97 def document(self) -> document | None:

98 """Return the `document` root node of the tree containing this Node.

99 """

100 try:

101 return self._document or self.parent.document

102 except AttributeError:

103 return None

104

105 @document.setter

106 def document(self, value: document) -> None:

107 self._document = value

108

109 def __bool__(self) -> Literal[True]:

110 """

111 Node instances are always true, even if they're empty. A node is more

112 than a simple container. Its boolean "truth" does not depend on

113 having one or more subnodes in the doctree.

114

115 Use `len()` to check node length.

116 """

117 return True

118

119 def asdom(self,

120 dom: ModuleType | None = None,

121 ) -> minidom.Document | minidom.Element | minidom.Text:

122 # TODO: minidom.Document is only returned by document.asdom()

123 # (which overwrites this base-class implementation)

124 """Return a DOM **fragment** representation of this Node."""

125 if dom is None:

126 import xml.dom.minidom as dom

127 domroot = dom.Document()

128 return self._dom_node(domroot)

129

130 def pformat(self, indent: str = ' ', level: int = 0) -> str:

131 """

132 Return an indented pseudo-XML representation, for test purposes.

133

134 Override in subclasses.

135 """

136 raise NotImplementedError

137

138 def copy(self) -> Self:

139 """Return a copy of self."""

140 raise NotImplementedError

141

142 def deepcopy(self) -> Self:

143 """Return a deep copy of self (also copying children)."""

144 raise NotImplementedError

145

146 def astext(self) -> str:

147 """Return a string representation of this Node."""

148 raise NotImplementedError

149

150 def setup_child(self, child) -> None:

151 child.parent = self

152 if self.document:

153 child.document = self.document

154 if child.source is None:

155 child.source = self.document.current_source

156 if child.line is None:

157 child.line = self.document.current_line

158

159 def walk(self, visitor: NodeVisitor) -> bool:

160 """

161 Traverse a tree of `Node` objects, calling the

162 `dispatch_visit()` method of `visitor` when entering each

163 node. (The `walkabout()` method is similar, except it also

164 calls the `dispatch_departure()` method before exiting each

165 node.)

166

167 This tree traversal supports limited in-place tree

168 modifications. Replacing one node with one or more nodes is

169 OK, as is removing an element. However, if the node removed

170 or replaced occurs after the current node, the old node will

171 still be traversed, and any new nodes will not.

172

173 Within ``visit`` methods (and ``depart`` methods for

174 `walkabout()`), `TreePruningException` subclasses may be raised

175 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).

176

177 Parameter `visitor`: A `NodeVisitor` object, containing a

178 ``visit`` implementation for each `Node` subclass encountered.

179

180 Return true if we should stop the traversal.

181 """

182 stop = False

183 visitor.document.reporter.debug(

184 'docutils.nodes.Node.walk calling dispatch_visit for %s'

185 % self.__class__.__name__)

186 try:

187 try:

188 visitor.dispatch_visit(self)

189 except (SkipChildren, SkipNode):

190 return stop

191 except SkipDeparture: # not applicable; ignore

192 pass

193 children = self.children

194 try:

195 for child in children[:]:

196 if child.walk(visitor):

197 stop = True

198 break

199 except SkipSiblings:

200 pass

201 except StopTraversal:

202 stop = True

203 return stop

204

205 def walkabout(self, visitor: NodeVisitor) -> bool:

206 """

207 Perform a tree traversal similarly to `Node.walk()` (which

208 see), except also call the `dispatch_departure()` method

209 before exiting each node.

210

211 Parameter `visitor`: A `NodeVisitor` object, containing a

212 ``visit`` and ``depart`` implementation for each `Node`

213 subclass encountered.

214

215 Return true if we should stop the traversal.

216 """

217 call_depart = True

218 stop = False

219 visitor.document.reporter.debug(

220 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'

221 % self.__class__.__name__)

222 try:

223 try:

224 visitor.dispatch_visit(self)

225 except SkipNode:

226 return stop

227 except SkipDeparture:

228 call_depart = False

229 children = self.children

230 try:

231 for child in children[:]:

232 if child.walkabout(visitor):

233 stop = True

234 break

235 except SkipSiblings:

236 pass

237 except SkipChildren:

238 pass

239 except StopTraversal:

240 stop = True

241 if call_depart:

242 visitor.document.reporter.debug(

243 'docutils.nodes.Node.walkabout calling dispatch_departure '

244 'for %s' % self.__class__.__name__)

245 visitor.dispatch_departure(self)

246 return stop

247

248 def _fast_findall(self, cls: type) -> Iterator:

249 """Return iterator that only supports instance checks."""

250 if isinstance(self, cls):

251 yield self

252 for child in self.children:

253 yield from child._fast_findall(cls)

254

255 def _superfast_findall(self) -> Iterator:

256 """Return iterator that doesn't check for a condition."""

257 # This is different from ``iter(self)`` implemented via

258 # __getitem__() and __len__() in the Element subclass,

259 # which yields only the direct children.

260 yield self

261 for child in self.children:

262 yield from child._superfast_findall()

263

264 def findall(self,

265 condition: type | Callable[[Node], bool] | None = None,

266 include_self: bool = True,

267 descend: bool = True,

268 siblings: bool = False,

269 ascend: bool = False,

270 ) -> Iterator:

271 """

272 Return an iterator yielding nodes following `self`:

273

274 * self (if `include_self` is true)

275 * all descendants in tree traversal order (if `descend` is true)

276 * the following siblings (if `siblings` is true) and their

277 descendants (if also `descend` is true)

278 * the following siblings of the parent (if `ascend` is true) and

279 their descendants (if also `descend` is true), and so on.

280

281 If `condition` is not None, the iterator yields only nodes

282 for which ``condition(node)`` is true. If `condition` is a

283 type ``cls``, it is equivalent to a function consisting

284 of ``return isinstance(node, cls)``.

285

286 If `ascend` is true, assume `siblings` to be true as well.

287

288 If the tree structure is modified during iteration, the result

289 is undefined.

290

291 For example, given the following tree::

292

293 <paragraph>

294 <emphasis> <--- emphasis.traverse() and

295 <strong> <--- strong.traverse() are called.

296 Foo

297 Bar

298 <reference name="Baz" refid="baz">

299 Baz

300

301 Then tuple(emphasis.traverse()) equals ::

302

303 (<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)

304

305 and list(strong.traverse(ascend=True) equals ::

306

307 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]

308 """

309 if ascend:

310 siblings = True

311 # Check for special argument combinations that allow using an

312 # optimized version of traverse()

313 if include_self and descend and not siblings:

314 if condition is None:

315 yield from self._superfast_findall()

316 return

317 elif isinstance(condition, type):

318 yield from self._fast_findall(condition)

319 return

320 # Check if `condition` is a class (check for TypeType for Python

321 # implementations that use only new-style classes, like PyPy).

322 if isinstance(condition, type):

323 node_class = condition

324

325 def condition(node, node_class=node_class):

326 return isinstance(node, node_class)

327

328 if include_self and (condition is None or condition(self)):

329 yield self

330 if descend and len(self.children):

331 for child in self:

332 yield from child.findall(condition=condition,

333 include_self=True, descend=True,

334 siblings=False, ascend=False)

335 if siblings or ascend:

336 node = self

337 while node.parent:

338 index = node.parent.index(node)

339 # extra check since Text nodes have value-equality

340 while node.parent[index] is not node:

341 index = node.parent.index(node, index + 1)

342 for sibling in node.parent[index+1:]:

343 yield from sibling.findall(

344 condition=condition,

345 include_self=True, descend=descend,

346 siblings=False, ascend=False)

347 if not ascend:

348 break

349 else:

350 node = node.parent

351

352 def traverse(self,

353 condition: type | Callable[[Node], bool] | None = None,

354 include_self: bool = True,

355 descend: bool = True,

356 siblings: bool = False,

357 ascend: bool = False,

358 ) -> list:

359 """Return list of nodes following `self`.

360

361 For looping, Node.findall() is faster and more memory efficient.

362 """

363 # traverse() may be eventually removed:

364 warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',

365 DeprecationWarning, stacklevel=2)

366 return list(self.findall(condition, include_self, descend,

367 siblings, ascend))

368

369 def next_node(self,

370 condition: type | Callable[[Node], bool] | None = None,

371 include_self: bool = False,

372 descend: bool = True,

373 siblings: bool = False,

374 ascend: bool = False,

375 ) -> Node | None:

376 """

377 Return the first node in the iterator returned by findall(),

378 or None if the iterable is empty.

379

380 Parameter list is the same as of `findall()`. Note that `include_self`

381 defaults to False, though.

382 """

383 try:

384 return next(self.findall(condition, include_self,

385 descend, siblings, ascend))

386 except StopIteration:

387 return None

388

389 def validate(self, recursive: bool = True) -> None:

390 """Raise ValidationError if this node is not valid.

391

392 Override in subclasses that define validity constraints.

393 """

394

395 def validate_position(self) -> None:

396 """Hook for additional checks of the parent's content model.

397

398 Raise ValidationError, if `self` is at an invalid position.

399

400 Override in subclasses with complex validity constraints. See

401 `subtitle.validate_position()` and `transition.validate_position()`.

402 """

403

404

405class Text(Node, str): # NoQA: SLOT000 (Node doesn't define __slots__)

406 """

407 Instances are terminal nodes (leaves) containing text only; no child

408 nodes or attributes. Initialize by passing a string to the constructor.

409

410 Access the raw (null-escaped) text with ``str(<instance>)``

411 and unescaped text with ``<instance>.astext()``.

412 """

413

414 tagname: Final = '#text'

415

416 children: Final = ()

417 """Text nodes have no children, and cannot have children."""

418

419 def __new__(cls, data: str, rawsource: None = None) -> Self:

420 """Assert that `data` is not an array of bytes

421 and warn if the deprecated `rawsource` argument is used.

422 """

423 if isinstance(data, bytes):

424 raise TypeError('expecting str data, not bytes')

425 if rawsource is not None:

426 warnings.warn('nodes.Text: initialization argument "rawsource" '

427 'is ignored and will be removed in Docutils 2.0.',

428 DeprecationWarning, stacklevel=2)

429 return str.__new__(cls, data)

430

431 def shortrepr(self, maxlen: int = 18) -> str:

432 data = self

433 if len(data) > maxlen:

434 data = data[:maxlen-4] + ' ...'

435 return '<%s: %r>' % (self.tagname, str(data))

436

437 def __repr__(self) -> str:

438 return self.shortrepr(maxlen=68)

439

440 def astext(self) -> str:

441 return str(unescape(self))

442

443 def _dom_node(self, domroot: minidom.Document) -> minidom.Text:

444 return domroot.createTextNode(str(self))

445

446 def copy(self) -> Self:

447 return self.__class__(str(self))

448

449 def deepcopy(self) -> Self:

450 return self.copy()

451

452 def pformat(self, indent: str = ' ', level: int = 0) -> str:

453 try:

454 if self.document.settings.detailed:

455 tag = '%s%s' % (indent*level, '<#text>')

456 lines = (indent*(level+1) + repr(line)

457 for line in self.splitlines(True))

458 return '\n'.join((tag, *lines)) + '\n'

459 except AttributeError:

460 pass

461 indent = indent * level

462 lines = [indent+line for line in self.astext().splitlines()]

463 if not lines:

464 return ''

465 return '\n'.join(lines) + '\n'

466

467 # rstrip and lstrip are used by substitution definitions where

468 # they are expected to return a Text instance, this was formerly

469 # taken care of by UserString.

470

471 def rstrip(self, chars: str | None = None) -> Self:

472 return self.__class__(str.rstrip(self, chars))

473

474 def lstrip(self, chars: str | None = None) -> Self:

475 return self.__class__(str.lstrip(self, chars))

476

477

478class Element(Node):

479 """

480 `Element` is the superclass to all specific elements.

481

482 Elements contain attributes and child nodes.

483 They can be described as a cross between a list and a dictionary.

484

485 Elements emulate dictionaries for external [#]_ attributes, indexing by

486 attribute name (a string). To set the attribute 'att' to 'value', do::

487

488 element['att'] = 'value'

489

490 .. [#] External attributes correspond to the XML element attributes.

491 From its `Node` superclass, Element also inherits "internal"

492 class attributes that are accessed using the standard syntax, e.g.

493 ``element.parent``.

494

495 There are two special attributes: 'ids' and 'names'. Both are

496 lists of unique identifiers: 'ids' conform to the regular expression

497 ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and

498 details). 'names' serve as user-friendly interfaces to IDs; they are

499 case- and whitespace-normalized (see the fully_normalize_name() function).

500

501 Elements emulate lists for child nodes (element nodes and/or text

502 nodes), indexing by integer. To get the first child node, use::

503

504 element[0]

505

506 to iterate over the child nodes (without descending), use::

507

508 for child in element:

509 ...

510

511 Elements may be constructed using the ``+=`` operator. To add one new

512 child node to element, do::

513

514 element += node

515

516 This is equivalent to ``element.append(node)``.

517

518 To add a list of multiple child nodes at once, use the same ``+=``

519 operator::

520

521 element += [node1, node2]

522

523 This is equivalent to ``element.extend([node1, node2])``.

524 """

525

526 list_attributes: Final = ('ids', 'classes', 'names', 'dupnames')

527 """Tuple of attributes that are initialized to empty lists.

528

529 NOTE: Derived classes should update this value when supporting

530 additional list attributes.

531 """

532

533 valid_attributes: Final = list_attributes + ('source',)

534 """Tuple of attributes that are valid for elements of this class.

535

536 NOTE: Derived classes should update this value when supporting

537 additional attributes.

538 """

539

540 common_attributes: Final = valid_attributes

541 """Tuple of `common attributes`__ known to all Doctree Element classes.

542

543 __ https://docutils.sourceforge.io/docs/ref/doctree.html#common-attributes

544 """

545

546 known_attributes: Final = common_attributes

547 """Alias for `common_attributes`. Will be removed in Docutils 2.0."""

548

549 basic_attributes: Final = list_attributes

550 """Common list attributes. Deprecated. Will be removed in Docutils 2.0."""

551

552 local_attributes: Final = ('backrefs',)

553 """Obsolete. Will be removed in Docutils 2.0."""

554

555 content_model: ClassVar[_ContentModelTuple] = ()

556 """Python representation of the element's content model (cf. docutils.dtd).

557

558 A tuple of ``(category, quantifier)`` tuples with

559

560 :category: class or tuple of classes that are expected at this place(s)

561 in the list of children

562 :quantifier: string representation stating how many elements

563 of `category` are expected. Value is one of:

564 '.' (exactly one), '?' (zero or one),

565 '+' (one or more), '*' (zero or more).

566

567 NOTE: The default describes the empty element. Derived classes should

568 update this value to match their content model.

569

570 Provisional.

571 """

572

573 tagname: str | None = None

574 """The element generic identifier.

575

576 If None, it is set as an instance attribute to the name of the class.

577 """

578

579 child_text_separator: Final = '\n\n'

580 """Separator for child nodes, used by `astext()` method."""

581

582 def __init__(self,

583 rawsource: str = '',

584 *children,

585 **attributes: Any,

586 ) -> None:

587 self.rawsource = rawsource

588 """The raw text from which this element was constructed.

589

590 For informative and debugging purposes. Don't rely on its value!

591

592 NOTE: some elements do not set this value (default '').

593 """

594 if isinstance(rawsource, Element):

595 raise TypeError('First argument "rawsource" must be a string.')

596

597 self.children: list = []

598 """List of child nodes (elements and/or `Text`)."""

599

600 self.extend(children) # maintain parent info

601

602 self.attributes: dict[str, Any] = {}

603 """Dictionary of attribute {name: value}."""

604

605 # Initialize list attributes.

606 for att in self.list_attributes:

607 self.attributes[att] = []

608

609 for att, value in attributes.items():

610 att = att.lower() # normalize attribute name

611 if att in self.list_attributes:

612 # lists are mutable; make a copy for this node

613 self.attributes[att] = value[:]

614 else:

615 self.attributes[att] = value

616

617 if self.tagname is None:

618 self.tagname: str = self.__class__.__name__

619

620 def _dom_node(self, domroot: minidom.Document) -> minidom.Element:

621 element = domroot.createElement(self.tagname)

622 for attribute, value in self.attlist():

623 if isinstance(value, list):

624 value = ' '.join(serial_escape('%s' % (v,)) for v in value)

625 element.setAttribute(attribute, '%s' % value)

626 for child in self.children:

627 element.appendChild(child._dom_node(domroot))

628 return element

629

630 def __repr__(self) -> str:

631 data = ''

632 for c in self.children:

633 data += c.shortrepr()

634 if len(data) > 60:

635 data = data[:56] + ' ...'

636 break

637 if self['names']:

638 return '<%s "%s": %s>' % (self.tagname,

639 '; '.join(self['names']), data)

640 else:

641 return '<%s: %s>' % (self.tagname, data)

642

643 def shortrepr(self) -> str:

644 if self['names']:

645 return '<%s "%s"...>' % (self.tagname, '; '.join(self['names']))

646 else:

647 return '<%s...>' % self.tagname

648

649 def __str__(self) -> str:

650 if self.children:

651 return '%s%s%s' % (self.starttag(),

652 ''.join(str(c) for c in self.children),

653 self.endtag())

654 else:

655 return self.emptytag()

656

657 def starttag(self, quoteattr: Callable[[str], str] | None = None) -> str:

658 # the optional arg is used by the docutils_xml writer

659 if quoteattr is None:

660 quoteattr = pseudo_quoteattr

661 parts = [self.tagname]

662 for name, value in self.attlist():

663 if value is None: # boolean attribute

664 parts.append('%s="True"' % name)

665 continue

666 if isinstance(value, bool):

667 value = str(int(value))

668 if isinstance(value, list):

669 values = [serial_escape('%s' % (v,)) for v in value]

670 value = ' '.join(values)

671 else:

672 value = str(value)

673 value = quoteattr(value)

674 parts.append('%s=%s' % (name, value))

675 return '<%s>' % ' '.join(parts)

676

677 def endtag(self) -> str:

678 return '</%s>' % self.tagname

679

680 def emptytag(self) -> str:

681 attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())

682 return '<%s/>' % ' '.join((self.tagname, *attributes))

683

684 def __len__(self) -> int:

685 return len(self.children)

686

687 def __contains__(self, key) -> bool:

688 # Test for both, children and attributes with operator ``in``.

689 if isinstance(key, str):

690 return key in self.attributes

691 return key in self.children

692

693 def __getitem__(self, key: str | int | slice) -> Any:

694 if isinstance(key, str):

695 return self.attributes[key]

696 elif isinstance(key, int):

697 return self.children[key]

698 elif isinstance(key, slice):

699 assert key.step in (None, 1), 'cannot handle slice with stride'

700 return self.children[key.start:key.stop]

701 else:

702 raise TypeError('element index must be an integer, a slice, or '

703 'an attribute name string')

704

705 def __setitem__(self, key, item) -> None:

706 if isinstance(key, str):

707 self.attributes[str(key)] = item

708 elif isinstance(key, int):

709 self.setup_child(item)

710 self.children[key] = item

711 elif isinstance(key, slice):

712 assert key.step in (None, 1), 'cannot handle slice with stride'

713 for node in item:

714 self.setup_child(node)

715 self.children[key.start:key.stop] = item

716 else:

717 raise TypeError('element index must be an integer, a slice, or '

718 'an attribute name string')

719

720 def __delitem__(self, key: str | int | slice) -> None:

721 if isinstance(key, str):

722 del self.attributes[key]

723 elif isinstance(key, int):

724 del self.children[key]

725 elif isinstance(key, slice):

726 assert key.step in (None, 1), 'cannot handle slice with stride'

727 del self.children[key.start:key.stop]

728 else:

729 raise TypeError('element index must be an integer, a simple '

730 'slice, or an attribute name string')

731

732 def __add__(self, other: list) -> list:

733 return self.children + other

734

735 def __radd__(self, other: list) -> list:

736 return other + self.children

737

738 def __iadd__(self, other) -> Self:

739 """Append a node or a list of nodes to `self.children`."""

740 if isinstance(other, Node):

741 self.append(other)

742 elif other is not None:

743 self.extend(other)

744 return self

745

746 def astext(self) -> str:

747 return self.child_text_separator.join(

748 [child.astext() for child in self.children])

749

750 def non_default_attributes(self) -> dict[str, Any]:

751 atts = {key: value for key, value in self.attributes.items()

752 if self.is_not_default(key)}

753 return atts

754

755 def attlist(self) -> list[tuple[str, Any]]:

756 return sorted(self.non_default_attributes().items())

757

758 def get(self, key: str, failobj: Any | None = None) -> Any:

759 return self.attributes.get(key, failobj)

760

761 def hasattr(self, attr: str) -> bool:

762 return attr in self.attributes

763

764 def delattr(self, attr: str) -> None:

765 if attr in self.attributes:

766 del self.attributes[attr]

767

768 def setdefault(self, key: str, failobj: Any | None = None) -> Any:

769 return self.attributes.setdefault(key, failobj)

770

771 has_key = hasattr

772

773 def get_language_code(self, fallback: str = '') -> str:

774 """Return node's language tag.

775

776 Look iteratively in self and parents for a class argument

777 starting with ``language-`` and return the remainder of it

778 (which should be a `BCP49` language tag) or the `fallback`.

779 """

780 for cls in self.get('classes', []):

781 if cls.startswith('language-'):

782 return cls.removeprefix('language-')

783 try:

784 return self.parent.get_language_code(fallback)

785 except AttributeError:

786 return fallback

787

788 def append(self, item) -> None:

789 self.setup_child(item)

790 self.children.append(item)

791

792 def extend(self, item: Iterable) -> None:

793 for node in item:

794 self.append(node)

795

796 def insert(self, index: SupportsIndex, item) -> None:

797 if isinstance(item, Node):

798 self.setup_child(item)

799 self.children.insert(index, item)

800 elif item is not None:

801 self[index:index] = item

802

803 def pop(self, i: int = -1):

804 return self.children.pop(i)

805

806 def remove(self, item) -> None:

807 self.children.remove(item)

808

809 def index(self, item, start: int = 0, stop: int = sys.maxsize) -> int:

810 return self.children.index(item, start, stop)

811

812 def previous_sibling(self):

813 """Return preceding sibling node or ``None``."""

814 try:

815 i = self.parent.index(self)

816 except (AttributeError):

817 return None

818 return self.parent[i-1] if i > 0 else None

819

820 def section_hierarchy(self) -> list[section]:

821 """Return the element's section hierarchy.

822

823 Return a list of all <section> elements that contain `self`

824 (including `self` if it is a <section>) and have a parent node.

825

826 List item ``[i]`` is the parent <section> of level i+1

827 (1: section, 2: subsection, 3: subsubsection, ...).

828 The length of the list is the element's section level.

829

830 See `docutils.parsers.rst.states.RSTState.check_subsection()`

831 for a usage example.

832

833 Provisional. May be changed or removed without warning.

834 """

835 sections = []

836 node = self

837 while node.parent is not None:

838 if isinstance(node, section):

839 sections.append(node)

840 node = node.parent

841 sections.reverse()

842 return sections

843

844 def is_not_default(self, key: str) -> bool:

845 if self[key] == [] and key in self.list_attributes:

846 return False

847 else:

848 return True

849

850 def update_basic_atts(self, dict_: Mapping[str, Any] | Element) -> None:

851 """

852 Update basic attributes ('ids', 'names', 'classes',

853 'dupnames', but not 'source') from node or dictionary `dict_`.

854

855 Provisional.

856 """

857 if isinstance(dict_, Node):

858 dict_ = dict_.attributes

859 for att in self.basic_attributes:

860 self.append_attr_list(att, dict_.get(att, []))

861

862 def append_attr_list(self, attr: str, values: Iterable[Any]) -> None:

863 """

864 For each element in values, if it does not exist in self[attr], append

865 it.

866

867 NOTE: Requires self[attr] and values to be sequence type and the

868 former should specifically be a list.

869 """

870 # List Concatenation

871 for value in values:

872 if value not in self[attr]:

873 self[attr].append(value)

874

875 def coerce_append_attr_list(

876 self, attr: str, value: list[Any] | Any) -> None:

877 """

878 First, convert both self[attr] and value to a non-string sequence

879 type; if either is not already a sequence, convert it to a list of one

880 element. Then call append_attr_list.

881

882 NOTE: self[attr] and value both must not be None.

883 """

884 # List Concatenation

885 if not isinstance(self.get(attr), list):

886 self[attr] = [self[attr]]

887 if not isinstance(value, list):

888 value = [value]

889 self.append_attr_list(attr, value)

890

891 def replace_attr(self, attr: str, value: Any, force: bool = True) -> None:

892 """

893 If self[attr] does not exist or force is True or omitted, set

894 self[attr] to value, otherwise do nothing.

895 """

896 # One or the other

897 if force or self.get(attr) is None:

898 self[attr] = value

899

900 def copy_attr_convert(

901 self, attr: str, value: Any, replace: bool = True) -> None:

902 """

903 If attr is an attribute of self, set self[attr] to

904 [self[attr], value], otherwise set self[attr] to value.

905

906 NOTE: replace is not used by this function and is kept only for

907 compatibility with the other copy functions.

908 """

909 if self.get(attr) is not value:

910 self.coerce_append_attr_list(attr, value)

911

912 def copy_attr_coerce(self, attr: str, value: Any, replace: bool) -> None:

913 """

914 If attr is an attribute of self and either self[attr] or value is a

915 list, convert all non-sequence values to a sequence of 1 element and

916 then concatenate the two sequence, setting the result to self[attr].

917 If both self[attr] and value are non-sequences and replace is True or

918 self[attr] is None, replace self[attr] with value. Otherwise, do

919 nothing.

920 """

921 if self.get(attr) is not value:

922 if isinstance(self.get(attr), list) or \

923 isinstance(value, list):

924 self.coerce_append_attr_list(attr, value)

925 else:

926 self.replace_attr(attr, value, replace)

927

928 def copy_attr_concatenate(

929 self, attr: str, value: Any, replace: bool) -> None:

930 """

931 If attr is an attribute of self and both self[attr] and value are

932 lists, concatenate the two sequences, setting the result to

933 self[attr]. If either self[attr] or value are non-sequences and

934 replace is True or self[attr] is None, replace self[attr] with value.

935 Otherwise, do nothing.

936 """

937 if self.get(attr) is not value:

938 if isinstance(self.get(attr), list) and \

939 isinstance(value, list):

940 self.append_attr_list(attr, value)

941 else:

942 self.replace_attr(attr, value, replace)

943

944 def copy_attr_consistent(

945 self, attr: str, value: Any, replace: bool) -> None:

946 """

947 If replace is True or self[attr] is None, replace self[attr] with

948 value. Otherwise, do nothing.

949 """

950 if self.get(attr) is not value:

951 self.replace_attr(attr, value, replace)

952

953 def update_all_atts(self,

954 dict_: Mapping[str, Any] | Element,

955 update_fun: _UpdateFun = copy_attr_consistent,

956 replace: bool = True,

957 and_source: bool = False,

958 ) -> None:

959 """

960 Updates all attributes from node or dictionary `dict_`.

961

962 Appends the basic attributes ('ids', 'names', 'classes',

963 'dupnames', but not 'source') and then, for all other attributes in

964 dict_, updates the same attribute in self. When attributes with the

965 same identifier appear in both self and dict_, the two values are

966 merged based on the value of update_fun. Generally, when replace is

967 True, the values in self are replaced or merged with the values in

968 dict_; otherwise, the values in self may be preserved or merged. When

969 and_source is True, the 'source' attribute is included in the copy.

970

971 NOTE: When replace is False, and self contains a 'source' attribute,

972 'source' is not replaced even when dict_ has a 'source'

973 attribute, though it may still be merged into a list depending

974 on the value of update_fun.

975 NOTE: It is easier to call the update-specific methods then to pass

976 the update_fun method to this function.

977 """

978 if isinstance(dict_, Node):

979 dict_ = dict_.attributes

980

981 # Include the source attribute when copying?

982 if and_source:

983 filter_fun = self.is_not_list_attribute

984 else:

985 filter_fun = self.is_not_known_attribute

986

987 # Copy the basic attributes

988 self.update_basic_atts(dict_)

989

990 # Grab other attributes in dict_ not in self except the

991 # (All basic attributes should be copied already)

992 for att in filter(filter_fun, dict_):

993 update_fun(self, att, dict_[att], replace)

994

995 def update_all_atts_consistantly(self,

996 dict_: Mapping[str, Any] | Element,

997 replace: bool = True,

998 and_source: bool = False,

999 ) -> None:

1000 """

1001 Updates all attributes from node or dictionary `dict_`.

1002

1003 Appends the basic attributes ('ids', 'names', 'classes',

1004 'dupnames', but not 'source') and then, for all other attributes in

1005 dict_, updates the same attribute in self. When attributes with the

1006 same identifier appear in both self and dict_ and replace is True, the

1007 values in self are replaced with the values in dict_; otherwise, the

1008 values in self are preserved. When and_source is True, the 'source'

1009 attribute is included in the copy.

1010

1011 NOTE: When replace is False, and self contains a 'source' attribute,

1012 'source' is not replaced even when dict_ has a 'source'

1013 attribute, though it may still be merged into a list depending

1014 on the value of update_fun.

1015 """

1016 self.update_all_atts(dict_, Element.copy_attr_consistent, replace,

1017 and_source)

1018

1019 def update_all_atts_concatenating(self,

1020 dict_: Mapping[str, Any] | Element,

1021 replace: bool = True,

1022 and_source: bool = False,

1023 ) -> None:

1024 """

1025 Updates all attributes from node or dictionary `dict_`.

1026

1027 Appends the basic attributes ('ids', 'names', 'classes',

1028 'dupnames', but not 'source') and then, for all other attributes in

1029 dict_, updates the same attribute in self. When attributes with the

1030 same identifier appear in both self and dict_ whose values aren't each

1031 lists and replace is True, the values in self are replaced with the

1032 values in dict_; if the values from self and dict_ for the given

1033 identifier are both of list type, then the two lists are concatenated

1034 and the result stored in self; otherwise, the values in self are

1035 preserved. When and_source is True, the 'source' attribute is

1036 included in the copy.

1037

1038 NOTE: When replace is False, and self contains a 'source' attribute,

1039 'source' is not replaced even when dict_ has a 'source'

1040 attribute, though it may still be merged into a list depending

1041 on the value of update_fun.

1042 """

1043 self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,

1044 and_source)

1045

1046 def update_all_atts_coercion(self,

1047 dict_: Mapping[str, Any] | Element,

1048 replace: bool = True,

1049 and_source: bool = False,

1050 ) -> None:

1051 """

1052 Updates all attributes from node or dictionary `dict_`.

1053

1054 Appends the basic attributes ('ids', 'names', 'classes',

1055 'dupnames', but not 'source') and then, for all other attributes in

1056 dict_, updates the same attribute in self. When attributes with the

1057 same identifier appear in both self and dict_ whose values are both

1058 not lists and replace is True, the values in self are replaced with

1059 the values in dict_; if either of the values from self and dict_ for

1060 the given identifier are of list type, then first any non-lists are

1061 converted to 1-element lists and then the two lists are concatenated

1062 and the result stored in self; otherwise, the values in self are

1063 preserved. When and_source is True, the 'source' attribute is

1064 included in the copy.

1065

1066 NOTE: When replace is False, and self contains a 'source' attribute,

1067 'source' is not replaced even when dict_ has a 'source'

1068 attribute, though it may still be merged into a list depending

1069 on the value of update_fun.

1070 """

1071 self.update_all_atts(dict_, Element.copy_attr_coerce, replace,

1072 and_source)

1073

1074 def update_all_atts_convert(self,

1075 dict_: Mapping[str, Any] | Element,

1076 and_source: bool = False,

1077 ) -> None:

1078 """

1079 Updates all attributes from node or dictionary `dict_`.

1080

1081 Appends the basic attributes ('ids', 'names', 'classes',

1082 'dupnames', but not 'source') and then, for all other attributes in

1083 dict_, updates the same attribute in self. When attributes with the

1084 same identifier appear in both self and dict_ then first any non-lists

1085 are converted to 1-element lists and then the two lists are

1086 concatenated and the result stored in self; otherwise, the values in

1087 self are preserved. When and_source is True, the 'source' attribute

1088 is included in the copy.

1089

1090 NOTE: When replace is False, and self contains a 'source' attribute,

1091 'source' is not replaced even when dict_ has a 'source'

1092 attribute, though it may still be merged into a list depending

1093 on the value of update_fun.

1094 """

1095 self.update_all_atts(dict_, Element.copy_attr_convert,

1096 and_source=and_source)

1097

1098 def clear(self) -> None:

1099 self.children = []

1100

1101 def replace(self, old, new) -> None:

1102 """Replace one child `Node` with another child or children."""

1103 index = self.index(old)

1104 if isinstance(new, Node):

1105 self.setup_child(new)

1106 self[index] = new

1107 elif new is not None:

1108 self[index:index+1] = new

1109

1110 def replace_self(self, new) -> None:

1111 """

1112 Replace `self` node with `new`, where `new` is a node or a

1113 list of nodes.

1114

1115 Provisional: the handling of node attributes will be revised.

1116 """

1117 update = new

1118 if not isinstance(new, Node):

1119 # `new` is a list; update first child.

1120 try:

1121 update = new[0]

1122 except IndexError:

1123 update = None

1124 if isinstance(update, Element):

1125 update.update_basic_atts(self)

1126 else:

1127 # `update` is a Text node or `new` is an empty list.

1128 # Assert that we aren't losing any attributes.

1129 for att in self.basic_attributes:

1130 assert not self[att], \

1131 'Losing "%s" attribute: %s' % (att, self[att])

1132 self.parent.replace(self, new)

1133

1134 def first_child_matching_class(self,

1135 childclass: type[Element] | type[Text]

1136 | tuple[type[Element] | type[Text], ...],

1137 start: int = 0,

1138 end: int = sys.maxsize,

1139 ) -> int | None:

1140 """

1141 Return the index of the first child whose class exactly matches.

1142

1143 Parameters:

1144

1145 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`

1146 classes. If a tuple, any of the classes may match.

1147 - `start`: Initial index to check.

1148 - `end`: Initial index to *not* check.

1149 """

1150 if not isinstance(childclass, tuple):

1151 childclass = (childclass,)

1152 for index in range(start, min(len(self), end)):

1153 for c in childclass:

1154 if isinstance(self[index], c):

1155 return index

1156 return None

1157

1158 def first_child_not_matching_class(

1159 self,

1160 childclass: type[Element] | type[Text]

1161 | tuple[type[Element] | type[Text], ...],

1162 start: int = 0,

1163 end: int = sys.maxsize,

1164 ) -> int | None:

1165 """

1166 Return the index of the first child whose class does *not* match.

1167

1168 Parameters:

1169

1170 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`

1171 classes. If a tuple, none of the classes may match.

1172 - `start`: Initial index to check.

1173 - `end`: Initial index to *not* check.

1174 """

1175 if not isinstance(childclass, tuple):

1176 childclass = (childclass,)

1177 for index in range(start, min(len(self), end)):

1178 for c in childclass:

1179 if isinstance(self.children[index], c):

1180 break

1181 else:

1182 return index

1183 return None

1184

1185 def pformat(self, indent: str = ' ', level: int = 0) -> str:

1186 tagline = '%s%s\n' % (indent*level, self.starttag())

1187 childreps = (c.pformat(indent, level+1) for c in self.children)

1188 return ''.join((tagline, *childreps))

1189

1190 def copy(self) -> Self:

1191 obj = self.__class__(rawsource=self.rawsource, **self.attributes)

1192 obj._document = self._document

1193 obj.source = self.source

1194 obj.line = self.line

1195 return obj

1196

1197 def deepcopy(self) -> Self:

1198 copy = self.copy()

1199 copy.extend([child.deepcopy() for child in self.children])

1200 return copy

1201

1202 def note_referenced_by(self,

1203 name: str | None = None,

1204 id: str | None = None,

1205 ) -> None:

1206 """Note that this Element has been referenced by its name

1207 `name` or id `id`."""

1208 self.referenced = True

1209 # Element.expect_referenced_by_* dictionaries map names or ids

1210 # to nodes whose ``referenced`` attribute is set to true as

1211 # soon as this node is referenced by the given name or id.

1212 # Needed for target propagation.

1213 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)

1214 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)

1215 if by_name:

1216 assert name is not None

1217 by_name.referenced = True

1218 if by_id:

1219 assert id is not None

1220 by_id.referenced = True

1221

1222 @classmethod

1223 def is_not_list_attribute(cls, attr: str) -> bool:

1224 """

1225 Returns True if and only if the given attribute is NOT one of the

1226 basic list attributes defined for all Elements.

1227 """

1228 return attr not in cls.list_attributes

1229

1230 @classmethod

1231 def is_not_known_attribute(cls, attr: str) -> bool:

1232 """

1233 Return True if `attr` is NOT defined for all Element instances.

1234

1235 Provisional. May be removed in Docutils 2.0.

1236 """

1237 return attr not in cls.common_attributes

1238

1239 def validate_attributes(self) -> None:

1240 """Normalize and validate element attributes.

1241

1242 Convert string values to expected datatype.

1243 Normalize values.

1244

1245 Raise `ValidationError` for invalid attributes or attribute values.

1246

1247 Provisional.

1248 """

1249 messages = []

1250 for key, value in self.attributes.items():

1251 if key.startswith('internal:'):

1252 continue # see docs/user/config.html#expose-internals

1253 if key not in self.valid_attributes:

1254 va = '", "'.join(self.valid_attributes)

1255 messages.append(f'Attribute "{key}" not one of "{va}".')

1256 continue

1257 try:

1258 self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)

1259 except (ValueError, TypeError, KeyError) as e:

1260 messages.append(

1261 f'Attribute "{key}" has invalid value "{value}".\n {e}')

1262 if messages:

1263 raise ValidationError(f'Element {self.starttag()} invalid:\n '

1264 + '\n '.join(messages),

1265 problematic_element=self)

1266

1267 def validate_content(self,

1268 model: _ContentModelTuple | None = None,

1269 elements: Sequence | None = None,

1270 ) -> list:

1271 """Test compliance of `elements` with `model`.

1272

1273 :model: content model description, default `self.content_model`,

1274 :elements: list of doctree elements, default `self.children`.

1275

1276 Return list of children that do not fit in the model or raise

1277 `ValidationError` if the content does not comply with the `model`.

1278

1279 Provisional.

1280 """

1281 if model is None:

1282 model = self.content_model

1283 if elements is None:

1284 elements = self.children

1285 ichildren = iter(elements)

1286 child = next(ichildren, None)

1287 for category, quantifier in model:

1288 if not isinstance(child, category):

1289 if quantifier in ('.', '+'):

1290 raise ValidationError(self._report_child(child, category),

1291 problematic_element=child)

1292 else: # quantifier in ('?', '*') -> optional child

1293 continue # try same child with next part of content model

1294 else:

1295 # Check additional placement constraints (if applicable):

1296 child.validate_position()

1297 # advance:

1298 if quantifier in ('.', '?'): # go to next element

1299 child = next(ichildren, None)

1300 else: # if quantifier in ('*', '+'): # pass all matching elements

1301 for child in ichildren:

1302 if not isinstance(child, category):

1303 break

1304 try:

1305 child.validate_position()

1306 except AttributeError:

1307 pass

1308 else:

1309 child = None

1310 return [] if child is None else [child, *ichildren]

1311

1312 def _report_child(self,

1313 child,

1314 category: Element | Iterable[Element],

1315 ) -> str:

1316 # Return a str reporting a missing child or child of wrong category.

1317 try:

1318 _type = category.__name__

1319 except AttributeError:

1320 _type = '> or <'.join(c.__name__ for c in category)

1321 msg = f'Element {self.starttag()} invalid:\n'

1322 if child is None:

1323 return f'{msg} Missing child of type <{_type}>.'

1324 if isinstance(child, Text):

1325 return (f'{msg} Expecting child of type <{_type}>, '

1326 f'not text data "{child.astext()}".')

1327 return (f'{msg} Expecting child of type <{_type}>, '

1328 f'not {child.starttag()}.')

1329

1330 def validate(self, recursive: bool = True) -> None:

1331 """Validate Docutils Document Tree element ("doctree").

1332

1333 Raise ValidationError if there are violations.

1334 If `recursive` is True, validate also the element's descendants.

1335

1336 See `The Docutils Document Tree`__ for details of the

1337 Docutils Document Model.

1338

1339 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1340

1341 Provisional (work in progress).

1342 """

1343 self.validate_attributes()

1344

1345 leftover_childs = self.validate_content()

1346 for child in leftover_childs:

1347 if isinstance(child, Text):

1348 raise ValidationError(f'Element {self.starttag()} invalid:\n'

1349 f' Spurious text: "{child.astext()}".',

1350 problematic_element=self)

1351 else:

1352 raise ValidationError(f'Element {self.starttag()} invalid:\n'

1353 f' Child element {child.starttag()} '

1354 'not allowed at this position.',

1355 problematic_element=child)

1356

1357 if recursive:

1358 for child in self:

1359 child.validate(recursive=recursive)

1360

1361

1362# ====================

1363# Element Categories

1364# ====================

1365#

1366# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-hierarchy.

1367

1368class Root:

1369 """Element at the root of a document tree."""

1370

1371

1372class Structural:

1373 """`Structural elements`__.

1374

1375 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1376 #structural-elements

1377 """

1378

1379

1380class SubStructural:

1381 """`Structural subelements`__ are children of `Structural` elements.

1382

1383 Most Structural elements accept only specific `SubStructural` elements.

1384

1385 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1386 #structural-subelements

1387 """

1388

1389

1390class Bibliographic:

1391 """`Bibliographic Elements`__ (displayed document meta-data).

1392

1393 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1394 #bibliographic-elements

1395 """

1396

1397

1398class Body:

1399 """`Body elements`__.

1400

1401 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-elements

1402 """

1403

1404

1405class Admonition(Body):

1406 """Admonitions (distinctive and self-contained notices)."""

1407 content_model: Final = ((Body, '+'),) # (%body.elements;)+

1408

1409

1410class Sequential(Body):

1411 """List-like body elements."""

1412

1413

1414class General(Body):

1415 """Miscellaneous body elements."""

1416

1417

1418class Special(Body):

1419 """Special internal body elements."""

1420

1421

1422class Part:

1423 """`Body Subelements`__ always occur within specific parent elements.

1424

1425 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-subelements

1426 """

1427

1428

1429class Decorative:

1430 """Decorative elements (`header` and `footer`).

1431

1432 Children of `decoration`.

1433 """

1434 content_model: Final = ((Body, '+'),) # (%body.elements;)+

1435

1436

1437class Inline:

1438 """Inline elements contain text data and possibly other inline elements.

1439 """

1440

1441

1442# Orthogonal categories and Mixins

1443# ================================

1444

1445class PreBibliographic:

1446 """Elements which may occur before Bibliographic Elements."""

1447

1448

1449class Invisible(Special, PreBibliographic):

1450 """Internal elements that don't appear in output."""

1451

1452

1453class Labeled:

1454 """Contains a `label` as its first element."""

1455

1456

1457class Resolvable:

1458 resolved: bool = False

1459

1460

1461class BackLinkable:

1462 """Mixin for Elements that accept a "backrefs" attribute."""

1463

1464 list_attributes: Final = Element.list_attributes + ('backrefs',)

1465 valid_attributes: Final = Element.valid_attributes + ('backrefs',)

1466

1467 def add_backref(self: Element, refid: str) -> None:

1468 self['backrefs'].append(refid)

1469

1470

1471class Referential(Resolvable):

1472 """Elements holding a cross-reference (outgoing hyperlink)."""

1473

1474

1475class Targetable(Resolvable):

1476 """Cross-reference targets (incoming hyperlink)."""

1477 referenced: int = 0

1478

1479 indirect_reference_name: str | None = None

1480 """Holds the whitespace_normalized_name (contains mixed case) of a target.

1481

1482 This was required for MoinMoin <= 1.9 compatibility.

1483

1484 Deprecated, will be removed in Docutils 1.0.

1485 """

1486

1487

1488class Titular:

1489 """Title, sub-title, or informal heading (rubric)."""

1490

1491

1492class TextElement(Element):

1493 """

1494 An element which directly contains text.

1495

1496 Its children are all `Text` or `Inline` subclass nodes. You can

1497 check whether an element's context is inline simply by checking whether

1498 its immediate parent is a `TextElement` instance (including subclasses).

1499 This is handy for nodes like `image` that can appear both inline and as

1500 standalone body elements.

1501

1502 If passing children to `__init__()`, make sure to set `text` to

1503 ``''`` or some other suitable value.

1504 """

1505 content_model: Final = (((Text, Inline), '*'),)

1506 # (#PCDATA | %inline.elements;)*

1507

1508 child_text_separator: Final = ''

1509 """Separator for child nodes, used by `astext()` method."""

1510

1511 def __init__(self,

1512 rawsource: str = '',

1513 text: str = '',

1514 *children,

1515 **attributes: Any,

1516 ) -> None:

1517 if text:

1518 textnode = Text(text)

1519 Element.__init__(self, rawsource, textnode, *children,

1520 **attributes)

1521 else:

1522 Element.__init__(self, rawsource, *children, **attributes)

1523

1524

1525class FixedTextElement(TextElement):

1526 """An element which directly contains preformatted text."""

1527

1528 valid_attributes: Final = Element.valid_attributes + ('xml:space',)

1529

1530 def __init__(self,

1531 rawsource: str = '',

1532 text: str = '',

1533 *children,

1534 **attributes: Any,

1535 ) -> None:

1536 super().__init__(rawsource, text, *children, **attributes)

1537 self.attributes['xml:space'] = 'preserve'

1538

1539

1540class PureTextElement(TextElement):

1541 """An element which only contains text, no children."""

1542 content_model: Final = ((Text, '?'),) # (#PCDATA)

1543

1544

1545# =================================

1546# Concrete Document Tree Elements

1547# =================================

1548#

1549# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-reference

1550

1551# Decorative Elements

1552# ===================

1553

1554class header(Decorative, Element): pass

1555class footer(Decorative, Element): pass

1556

1557

1558# Structural Subelements

1559# ======================

1560

1561class title(Titular, PreBibliographic, SubStructural, TextElement):

1562 """Title of `document`, `section`, `topic` and generic `admonition`.

1563 """

1564 valid_attributes: Final = Element.valid_attributes + ('auto', 'refid')

1565

1566

1567class subtitle(Titular, PreBibliographic, SubStructural, TextElement):

1568 """Sub-title of `document`, `section` and `sidebar`."""

1569

1570 def validate_position(self) -> None:

1571 """Check position of subtitle: must follow a title."""

1572 if self.parent and self.parent.index(self) == 0:

1573 raise ValidationError(f'Element {self.parent.starttag()} invalid:'

1574 '\n <subtitle> only allowed after <title>.',

1575 problematic_element=self)

1576

1577

1578class meta(PreBibliographic, SubStructural, Element):

1579 """Container for "invisible" bibliographic data, or meta-data."""

1580 valid_attributes: Final = Element.valid_attributes + (

1581 'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')

1582

1583

1584class docinfo(SubStructural, Element):

1585 """Container for displayed document meta-data."""

1586 content_model: Final = ((Bibliographic, '+'),)

1587 # (%bibliographic.elements;)+

1588

1589

1590class decoration(PreBibliographic, SubStructural, Element):

1591 """Container for `header` and `footer`."""

1592 content_model: Final = ((header, '?'), # Empty element doesn't make sense,

1593 (footer, '?'), # but is simpler to define.

1594 )

1595 # (header?, footer?)

1596

1597 def get_header(self) -> header:

1598 if not len(self.children) or not isinstance(self.children[0], header):

1599 self.insert(0, header())

1600 return self.children[0]

1601

1602 def get_footer(self) -> footer:

1603 if not len(self.children) or not isinstance(self.children[-1], footer):

1604 self.append(footer())

1605 return self.children[-1]

1606

1607

1608class transition(SubStructural, Element):

1609 """Transitions__ are breaks between untitled text parts.

1610

1611 __ https://docutils.sourceforge.io/docs/ref/doctree.html#transition

1612 """

1613

1614 def validate_position(self) -> None:

1615 """Check additional constraints on `transition` placement.

1616

1617 A transition may not begin or end a section or document,

1618 nor may two transitions be immediately adjacent.

1619 """

1620 messages = [f'Element {self.parent.starttag()} invalid:']

1621 predecessor = self.previous_sibling()

1622 if (predecessor is None # index == 0

1623 or isinstance(predecessor, (title, subtitle, meta, decoration))

1624 # A transition following these elements still counts as

1625 # "at the beginning of a document or section".

1626 ):

1627 messages.append(

1628 '<transition> may not begin a section or document.')

1629 if self.parent.index(self) == len(self.parent) - 1:

1630 messages.append('<transition> may not end a section or document.')

1631 if isinstance(predecessor, transition):

1632 messages.append(

1633 '<transition> may not directly follow another transition.')

1634 if len(messages) > 1:

1635 raise ValidationError('\n '.join(messages),

1636 problematic_element=self)

1637

1638

1639# Structural Elements

1640# ===================

1641

1642class topic(Structural, Element):

1643 """

1644 Topics__ are non-recursive, mini-sections.

1645

1646 __ https://docutils.sourceforge.io/docs/ref/doctree.html#topic

1647 """

1648 content_model: Final = ((title, '?'), (Body, '+'))

1649 # (title?, (%body.elements;)+)

1650

1651

1652class sidebar(Structural, Element):

1653 """

1654 Sidebars__ are like parallel documents providing related material.

1655

1656 A sidebar is typically offset by a border and "floats" to the side

1657 of the page

1658

1659 __ https://docutils.sourceforge.io/docs/ref/doctree.html#sidebar

1660 """

1661 content_model: Final = ((title, '?'),

1662 (subtitle, '?'),

1663 ((topic, Body), '+'),

1664 )

1665 # ((title, subtitle?)?, (%body.elements; | topic)+)

1666 # "subtitle only after title" is ensured in `subtitle.validate_position()`.

1667

1668

1669class section(Structural, Element):

1670 """Document section__. The main unit of hierarchy.

1671

1672 __ https://docutils.sourceforge.io/docs/ref/doctree.html#section

1673 """

1674 # recursive content model, see below

1675

1676

1677section.content_model = ((title, '.'),

1678 (subtitle, '?'),

1679 ((Body, topic, sidebar, transition), '*'),

1680 ((section, transition), '*'),

1681 )

1682# (title, subtitle?, %structure.model;)

1683# Correct transition placement is ensured in `transition.validate_position()`.

1684

1685

1686# Root Element

1687# ============

1688

1689class document(Root, Element):

1690 """

1691 The document root element.

1692

1693 Do not instantiate this class directly; use

1694 `docutils.utils.new_document()` instead.

1695 """

1696 valid_attributes: Final = Element.valid_attributes + ('title',)

1697 content_model: Final = ((title, '?'),

1698 (subtitle, '?'),

1699 (meta, '*'),

1700 (decoration, '?'),

1701 (docinfo, '?'),

1702 (transition, '?'),

1703 ((Body, topic, sidebar, transition), '*'),

1704 ((section, transition), '*'),

1705 )

1706 # ( (title, subtitle?)?,

1707 # meta*,

1708 # decoration?,

1709 # (docinfo, transition?)?,

1710 # %structure.model; )

1711 # Additional restrictions for `subtitle` and `transition` are tested

1712 # with the respective `validate_position()` methods.

1713

1714 def __init__(self,

1715 settings: Values,

1716 reporter: Reporter,

1717 *args,

1718 **kwargs: Any,

1719 ) -> None:

1720 Element.__init__(self, *args, **kwargs)

1721

1722 self.current_source: StrPath | None = None

1723 """Path to or description of the input source being processed."""

1724

1725 self.current_line: int | None = None

1726 """Line number (1-based) of `current_source`."""

1727

1728 self.settings: Values = settings

1729 """Runtime settings data record."""

1730

1731 self.reporter: Reporter = reporter

1732 """System message generator."""

1733

1734 self.indirect_targets: list[target] = []

1735 """List of indirect target nodes."""

1736

1737 self.substitution_defs: dict[str, substitution_definition] = {}

1738 """Mapping of substitution names to substitution_definition nodes."""

1739

1740 self.substitution_names: dict[str, str] = {}

1741 """Mapping of case-normalized to case-sensitive substitution names."""

1742

1743 self.refnames: dict[str, list[Element]] = {}

1744 """Mapping of names to lists of referencing nodes."""

1745

1746 self.refids: dict[str, list[Element]] = {}

1747 """Mapping of ids to lists of referencing nodes."""

1748

1749 self.nameids: dict[str, str] = {}

1750 """Mapping of names to unique id's."""

1751

1752 self.nametypes: dict[str, bool] = {}

1753 """Mapping of names to hyperlink type. True: explicit, False: implicit.

1754 """

1755

1756 self.ids: dict[str, Element] = {}

1757 """Mapping of ids to nodes."""

1758

1759 self.footnote_refs: dict[str, list[footnote_reference]] = {}

1760 """Mapping of footnote labels to lists of footnote_reference nodes."""

1761

1762 self.citation_refs: dict[str, list[citation_reference]] = {}

1763 """Mapping of citation labels to lists of citation_reference nodes."""

1764

1765 self.autofootnotes: list[footnote] = []

1766 """List of auto-numbered footnote nodes."""

1767

1768 self.autofootnote_refs: list[footnote_reference] = []

1769 """List of auto-numbered footnote_reference nodes."""

1770

1771 self.symbol_footnotes: list[footnote] = []

1772 """List of symbol footnote nodes."""

1773

1774 self.symbol_footnote_refs: list[footnote_reference] = []

1775 """List of symbol footnote_reference nodes."""

1776

1777 self.footnotes: list[footnote] = []

1778 """List of manually-numbered footnote nodes."""

1779

1780 self.citations: list[citation] = []

1781 """List of citation nodes."""

1782

1783 self.autofootnote_start: int = 1

1784 """Initial auto-numbered footnote number."""

1785

1786 self.symbol_footnote_start: int = 0

1787 """Initial symbol footnote symbol index."""

1788

1789 self.id_counter: Counter[int] = Counter()

1790 """Numbers added to otherwise identical IDs."""

1791

1792 self.parse_messages: list[system_message] = []

1793 """System messages generated while parsing."""

1794

1795 self.transform_messages: list[system_message] = []

1796 """System messages generated while applying transforms."""

1797

1798 import docutils.transforms

1799 self.transformer: Transformer = docutils.transforms.Transformer(self)

1800 """Storage for transforms to be applied to this document."""

1801

1802 self.include_log: list[tuple[StrPath, tuple]] = []

1803 """The current source's parents (to detect inclusion loops)."""

1804

1805 self.decoration: decoration | None = None

1806 """Document's `decoration` node."""

1807

1808 self._document: document = self

1809

1810 def __getstate__(self) -> dict[str, Any]:

1811 """

1812 Return dict with unpicklable references removed.

1813 """

1814 state = self.__dict__.copy()

1815 state['reporter'] = None

1816 state['transformer'] = None

1817 return state

1818

1819 def asdom(self, dom: ModuleType | None = None) -> minidom.Document:

1820 """Return a DOM representation of this document."""

1821 if dom is None:

1822 import xml.dom.minidom as dom

1823 domroot = dom.Document()

1824 domroot.appendChild(self._dom_node(domroot))

1825 return domroot

1826

1827 def set_id(self,

1828 node: Element,

1829 msgnode: Element | None = None,

1830 suggested_prefix: str = '',

1831 ) -> str:

1832 if node['ids']:

1833 # register and check for duplicates

1834 for id in node['ids']:

1835 self.ids.setdefault(id, node)

1836 if self.ids[id] is not node:

1837 msg = self.reporter.error(f'Duplicate ID: "{id}" used by '

1838 f'{self.ids[id].starttag()} '

1839 f'and {node.starttag()}',

1840 base_node=node)

1841 if msgnode is not None:

1842 msgnode += msg

1843 return id

1844 # generate and set id

1845 id_prefix = self.settings.id_prefix

1846 auto_id_prefix = self.settings.auto_id_prefix

1847 base_id = ''

1848 id = ''

1849 for name in node['names']:

1850 if id_prefix: # allow names starting with numbers

1851 base_id = make_id('x'+name)[1:]

1852 else:

1853 base_id = make_id(name)

1854 # TODO: normalize id-prefix? (would make code simpler)

1855 id = id_prefix + base_id

1856 if base_id and id not in self.ids:

1857 break

1858 else:

1859 if base_id and auto_id_prefix.endswith('%'):

1860 # disambiguate name-derived ID

1861 # TODO: remove second condition after announcing change

1862 prefix = id + '-'

1863 else:

1864 prefix = id_prefix + auto_id_prefix

1865 if prefix.endswith('%'):

1866 prefix = f"""{prefix[:-1]}{suggested_prefix

1867 or make_id(node.tagname)}-"""

1868 while True:

1869 self.id_counter[prefix] += 1

1870 id = f'{prefix}{self.id_counter[prefix]}'

1871 if id not in self.ids:

1872 break

1873 node['ids'].append(id)

1874 self.ids[id] = node

1875 return id

1876

1877 def set_name_id_map(self,

1878 node: Element,

1879 id: str,

1880 msgnode: Element | None = None,

1881 explicit: bool = False,

1882 ) -> None:

1883 """

1884 Update the name/id mappings.

1885

1886 `self.nameids` maps names to IDs. The value ``None`` indicates

1887 that the name is a "dupname" (i.e. there are already at least

1888 two targets with the same name and type).

1889

1890 `self.nametypes` maps names to booleans representing

1891 hyperlink target type (True==explicit, False==implicit).

1892

1893 The following state transition table shows how `self.nameids` items

1894 ("id") and `self.nametypes` items ("type") change with new input

1895 (a call to this method), and what actions are performed:

1896

1897 ======== ==== ======== ==== ======== ======== ======= ======

1898 Input Old State New State Action Notes

1899 -------- -------------- -------------- ---------------- ------

1900 type id type id type dupname report

1901 ======== ==== ======== ==== ======== ======== ======= ======

1902 explicit new explicit

1903 implicit new implicit

1904 explicit old explicit None explicit new,old WARNING [#ex]_

1905 implicit old explicit old explicit new INFO [#ex]_

1906 explicit old implicit new explicit old INFO [#ex]_

1907 implicit old implicit None implicit new,old INFO [#ex]_

1908 explicit None explicit None explicit new WARNING

1909 implicit None explicit None explicit new INFO

1910 explicit None implicit new explicit

1911 implicit None implicit None implicit new INFO

1912 ======== ==== ======== ==== ======== ======== ======= ======

1913

1914 .. [#] Do not clear the name-to-id map or invalidate the old target if

1915 both old and new targets refer to identical URIs or reference names.

1916 The new target is invalidated regardless.

1917

1918 Provisional. There will be changes to prefer explicit reference names

1919 as base for an element's ID.

1920 """

1921 for name in tuple(node['names']):

1922 if name in self.nameids:

1923 self.set_duplicate_name_id(node, id, name, msgnode, explicit)

1924 # attention: modifies node['names']

1925 else:

1926 self.nameids[name] = id

1927 self.nametypes[name] = explicit

1928

1929 def set_duplicate_name_id(self,

1930 node: Element,

1931 id: str,

1932 name: str,

1933 msgnode: Element,

1934 explicit: bool,

1935 ) -> None:

1936 old_id = self.nameids[name] # None if name is only dupname

1937 old_explicit = self.nametypes[name]

1938 old_node = self.ids.get(old_id)

1939 level = 0 # system message level: 1-info, 2-warning

1940

1941 self.nametypes[name] = old_explicit or explicit

1942

1943 if old_id is not None and (

1944 'refname' in node and node['refname'] == old_node.get('refname')

1945 or 'refuri' in node and node['refuri'] == old_node.get('refuri')

1946 ):

1947 # indirect targets with same reference -> keep old target

1948 level = 1

1949 ref = node.get('refuri') or node.get('refname')

1950 s = f'Duplicate name "{name}" for external target "{ref}".'

1951 dupname(node, name)

1952 elif explicit:

1953 if old_explicit:

1954 level = 2

1955 s = f'Duplicate explicit target name: "{name}".'

1956 dupname(node, name)

1957 if old_id is not None:

1958 dupname(old_node, name)

1959 self.nameids[name] = None

1960 else: # new explicit, old implicit -> override

1961 self.nameids[name] = id

1962 if old_id is not None:

1963 level = 1

1964 s = f'Target name overrides implicit target name "{name}".'

1965 dupname(old_node, name)

1966 else: # new name is implicit

1967 level = 1

1968 s = f'Duplicate implicit target name: "{name}".'

1969 dupname(node, name)

1970 if old_id is not None and not old_explicit:

1971 dupname(old_node, name)

1972 self.nameids[name] = None

1973

1974 if level:

1975 backrefs = [id]

1976 # don't add backref id for empty targets (not shown in output)

1977 if isinstance(node, target) and 'refuri' in node:

1978 backrefs = []

1979 msg = self.reporter.system_message(level, s,

1980 backrefs=backrefs,

1981 base_node=node)

1982 # try appending near to the problem:

1983 if msgnode is not None:

1984 msgnode += msg

1985 try:

1986 msgnode.validate(recursive=False)

1987 except ValidationError:

1988 # detach -> will be handled by `Messages` transform

1989 msgnode.pop()

1990 msg.parent = None

1991

1992 def has_name(self, name: str) -> bool:

1993 return name in self.nameids

1994

1995 # "note" here is an imperative verb: "take note of".

1996 def note_implicit_target(

1997 self, target: Element, msgnode: Element | None = None) -> None:

1998 # TODO: Postpone ID creation. Register reference name instead of ID

1999 # to allow for IDs based on explicit target pointing to the same

2000 # element. https://github.com/sphinx-doc/sphinx/issues/1961

2001 id = self.set_id(target, msgnode)

2002 self.set_name_id_map(target, id, msgnode, explicit=False)

2003

2004 def note_explicit_target(

2005 self, target: Element, msgnode: Element | None = None) -> None:

2006 id = self.set_id(target, msgnode)

2007 self.set_name_id_map(target, id, msgnode, explicit=True)

2008

2009 def note_refname(self, node: Element) -> None:

2010 self.refnames.setdefault(node['refname'], []).append(node)

2011

2012 def note_refid(self, node: Element) -> None:

2013 self.refids.setdefault(node['refid'], []).append(node)

2014

2015 def note_indirect_target(self, target: target) -> None:

2016 self.indirect_targets.append(target)

2017 if target['names']:

2018 self.note_refname(target)

2019

2020 def note_anonymous_target(self, target: target) -> None:

2021 self.set_id(target)

2022

2023 def note_autofootnote(self, footnote: footnote) -> None:

2024 self.set_id(footnote)

2025 self.autofootnotes.append(footnote)

2026

2027 def note_autofootnote_ref(self, ref: footnote_reference) -> None:

2028 self.set_id(ref)

2029 self.autofootnote_refs.append(ref)

2030

2031 def note_symbol_footnote(self, footnote: footnote) -> None:

2032 self.set_id(footnote)

2033 self.symbol_footnotes.append(footnote)

2034

2035 def note_symbol_footnote_ref(self, ref: footnote_reference) -> None:

2036 self.set_id(ref)

2037 self.symbol_footnote_refs.append(ref)

2038

2039 def note_footnote(self, footnote: footnote) -> None:

2040 self.set_id(footnote)

2041 self.footnotes.append(footnote)

2042

2043 def note_footnote_ref(self, ref: footnote_reference) -> None:

2044 self.set_id(ref)

2045 self.footnote_refs.setdefault(ref['refname'], []).append(ref)

2046 self.note_refname(ref)

2047

2048 def note_citation(self, citation: citation) -> None:

2049 self.citations.append(citation)

2050

2051 def note_citation_ref(self, ref: citation_reference) -> None:

2052 self.set_id(ref)

2053 self.citation_refs.setdefault(ref['refname'], []).append(ref)

2054 self.note_refname(ref)

2055

2056 def note_substitution_def(self,

2057 subdef: substitution_definition,

2058 def_name: str,

2059 msgnode: Element | None = None,

2060 ) -> None:

2061 name = whitespace_normalize_name(def_name)

2062 if name in self.substitution_defs:

2063 msg = self.reporter.error(

2064 'Duplicate substitution definition name: "%s".' % name,

2065 base_node=subdef)

2066 if msgnode is not None:

2067 msgnode += msg

2068 oldnode = self.substitution_defs[name]

2069 dupname(oldnode, name)

2070 # keep only the last definition:

2071 self.substitution_defs[name] = subdef

2072 # case-insensitive mapping:

2073 self.substitution_names[fully_normalize_name(name)] = name

2074

2075 def note_substitution_ref(self,

2076 subref: substitution_reference,

2077 refname: str,

2078 ) -> None:

2079 subref['refname'] = whitespace_normalize_name(refname)

2080

2081 def note_pending(

2082 self, pending: pending, priority: int | None = None) -> None:

2083 self.transformer.add_pending(pending, priority)

2084

2085 def note_parse_message(self, message: system_message) -> None:

2086 self.parse_messages.append(message)

2087

2088 def note_transform_message(self, message: system_message) -> None:

2089 self.transform_messages.append(message)

2090

2091 def note_source(self,

2092 source: StrPath | None,

2093 offset: int | None,

2094 ) -> None:

2095 self.current_source = source and os.fspath(source)

2096 if offset is None:

2097 self.current_line = offset

2098 else:

2099 self.current_line = offset + 1

2100

2101 def copy(self) -> Self:

2102 obj = self.__class__(self.settings, self.reporter,

2103 **self.attributes)

2104 obj.source = self.source

2105 obj.line = self.line

2106 return obj

2107

2108 def get_decoration(self) -> decoration:

2109 if not self.decoration:

2110 self.decoration: decoration = decoration()

2111 index = self.first_child_not_matching_class((Titular, meta))

2112 if index is None:

2113 self.append(self.decoration)

2114 else:

2115 self.insert(index, self.decoration)

2116 return self.decoration

2117

2118

2119# Bibliographic Elements

2120# ======================

2121

2122class author(Bibliographic, TextElement): pass

2123class organization(Bibliographic, TextElement): pass

2124class address(Bibliographic, FixedTextElement): pass

2125class contact(Bibliographic, TextElement): pass

2126class version(Bibliographic, TextElement): pass

2127class revision(Bibliographic, TextElement): pass

2128class status(Bibliographic, TextElement): pass

2129class date(Bibliographic, TextElement): pass

2130class copyright(Bibliographic, TextElement): pass # NoQA: A001 (builtin name)

2131

2132

2133class authors(Bibliographic, Element):

2134 """Container for author information for documents with multiple authors.

2135 """

2136 content_model: Final = ((author, '+'),

2137 (organization, '?'),

2138 (address, '?'),

2139 (contact, '?'),

2140 )

2141 # (author, organization?, address?, contact?)+

2142

2143 def validate_content(self,

2144 model: _ContentModelTuple | None = None,

2145 elements: Sequence | None = None,

2146 ) -> list:

2147 """Repeatedly test for children matching the content model.

2148

2149 Provisional.

2150 """

2151 relics = super().validate_content()

2152 while relics:

2153 relics = super().validate_content(elements=relics)

2154 return relics

2155

2156

2157# Body Elements

2158# =============

2159#

2160# General

2161# -------

2162#

2163# Miscellaneous Body Elements and related Body Subelements (Part)

2164

2165class paragraph(General, TextElement): pass

2166class rubric(Titular, General, TextElement): pass

2167

2168

2169class compound(General, Element):

2170 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2171

2172

2173class container(General, Element):

2174 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2175

2176

2177class attribution(Part, TextElement):

2178 """Visible reference to the source of a `block_quote`."""

2179

2180

2181class block_quote(General, Element):

2182 """An extended quotation, set off from the main text."""

2183 content_model: Final = ((Body, '+'), (attribution, '?'))

2184 # ((%body.elements;)+, attribution?)

2185

2186

2187class reference(General, Inline, Referential, TextElement):

2188 valid_attributes: Final = Element.valid_attributes + (

2189 'anonymous', 'name', 'refid', 'refname', 'refuri')

2190

2191

2192# Lists

2193# -----

2194#

2195# Lists (Sequential) and related Body Subelements (Part)

2196

2197class list_item(Part, Element):

2198 content_model: Final = ((Body, '*'),) # (%body.elements;)*

2199

2200

2201class bullet_list(Sequential, Element):

2202 valid_attributes: Final = Element.valid_attributes + ('bullet',)

2203 content_model: Final = ((list_item, '+'),) # (list_item+)

2204

2205

2206class enumerated_list(Sequential, Element):

2207 valid_attributes: Final = Element.valid_attributes + (

2208 'enumtype', 'prefix', 'suffix', 'start')

2209 content_model: Final = ((list_item, '+'),) # (list_item+)

2210

2211

2212class term(Part, TextElement): pass

2213class classifier(Part, TextElement): pass

2214

2215

2216class definition(Part, Element):

2217 """Definition of a `term` in a `definition_list`."""

2218 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2219

2220

2221class definition_list_item(Part, Element):

2222 content_model: Final = ((term, '.'),

2223 ((classifier, term), '*'),

2224 (definition, '.'),

2225 )

2226 # ((term, classifier*)+, definition)

2227

2228

2229class definition_list(Sequential, Element):

2230 """List of terms and their definitions.

2231

2232 Can be used for glossaries or dictionaries, to describe or

2233 classify things, for dialogues, or to itemize subtopics.

2234 """

2235 content_model: Final = ((definition_list_item, '+'),)

2236 # (definition_list_item+)

2237

2238

2239class field_name(Part, TextElement): pass

2240

2241

2242class field_body(Part, Element):

2243 content_model: Final = ((Body, '*'),) # (%body.elements;)*

2244

2245

2246class field(Part, Bibliographic, Element):

2247 content_model: Final = ((field_name, '.'), (field_body, '.'))

2248 # (field_name, field_body)

2249

2250

2251class field_list(Sequential, Element):

2252 """List of label & data pairs.

2253

2254 Typically rendered as a two-column list.

2255 Also used for extension syntax or special processing.

2256 """

2257 content_model: Final = ((field, '+'),) # (field+)

2258

2259

2260class option_string(Part, PureTextElement):

2261 """A literal command-line option. Typically monospaced."""

2262

2263

2264class option_argument(Part, PureTextElement):

2265 """Placeholder text for option arguments."""

2266 valid_attributes: Final = Element.valid_attributes + ('delimiter',)

2267

2268 def astext(self) -> str:

2269 return self.get('delimiter', ' ') + TextElement.astext(self)

2270

2271

2272class option(Part, Element):

2273 """Option element in an `option_list_item`.

2274

2275 Groups an option string with zero or more option argument placeholders.

2276 """

2277 child_text_separator: Final = ''

2278 content_model: Final = ((option_string, '.'), (option_argument, '*'))

2279 # (option_string, option_argument*)

2280

2281

2282class option_group(Part, Element):

2283 """Groups together one or more `option` elements, all synonyms."""

2284 child_text_separator: Final = ', '

2285 content_model: Final = ((option, '+'),) # (option+)

2286

2287

2288class description(Part, Element):

2289 """Describtion of a command-line option."""

2290 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2291

2292

2293class option_list_item(Part, Element):

2294 """Container for a pair of `option_group` and `description` elements.

2295 """

2296 child_text_separator: Final = ' '

2297 content_model: Final = ((option_group, '.'), (description, '.'))

2298 # (option_group, description)

2299

2300

2301class option_list(Sequential, Element):

2302 """Two-column list of command-line options and descriptions."""

2303 content_model: Final = ((option_list_item, '+'),) # (option_list_item+)

2304

2305

2306# Pre-formatted text blocks

2307# -------------------------

2308

2309class literal_block(General, FixedTextElement): pass

2310class doctest_block(General, FixedTextElement): pass

2311

2312

2313class math_block(General, FixedTextElement, PureTextElement):

2314 """Mathematical notation (display formula)."""

2315

2316

2317class line(Part, TextElement):

2318 """Single line of text in a `line_block`."""

2319 indent: str | None = None

2320

2321

2322class line_block(General, Element):

2323 """Sequence of lines and nested line blocks.

2324 """

2325 # recursive content model: (line | line_block)+

2326

2327

2328line_block.content_model = (((line, line_block), '+'),)

2329

2330

2331# Admonitions

2332# -----------

2333# distinctive and self-contained notices

2334

2335class attention(Admonition, Element): pass

2336class caution(Admonition, Element): pass

2337class danger(Admonition, Element): pass

2338class error(Admonition, Element): pass

2339class important(Admonition, Element): pass

2340class note(Admonition, Element): pass

2341class tip(Admonition, Element): pass

2342class hint(Admonition, Element): pass

2343class warning(Admonition, Element): pass

2344

2345

2346class admonition(Admonition, Element):

2347 content_model: Final = ((title, '.'), (Body, '+'))

2348 # (title, (%body.elements;)+)

2349

2350

2351# Footnote and citation

2352# ---------------------

2353

2354class label(Part, PureTextElement):

2355 """Visible identifier for footnotes and citations."""

2356

2357

2358class footnote(General, BackLinkable, Element, Labeled, Targetable):

2359 """Labelled note providing additional context (footnote or endnote)."""

2360 valid_attributes: Final = Element.valid_attributes + ('auto', 'backrefs')

2361 content_model: Final = ((label, '?'), (Body, '+'))

2362 # (label?, (%body.elements;)+)

2363 # The label will become required in Docutils 1.0.

2364

2365

2366class citation(General, BackLinkable, Element, Labeled, Targetable):

2367 content_model: Final = ((label, '.'), (Body, '+'))

2368 # (label, (%body.elements;)+)

2369

2370

2371# Graphical elements

2372# ------------------

2373

2374class image(General, Inline, Element):

2375 """Reference to an image resource.

2376

2377 May be body element or inline element.

2378 """

2379 valid_attributes: Final = Element.valid_attributes + (

2380 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')

2381

2382 def astext(self) -> str:

2383 return self.get('alt', '')

2384

2385

2386class caption(Part, TextElement): pass

2387

2388

2389class legend(Part, Element):

2390 """A wrapper for text accompanying a `figure` that is not the caption."""

2391 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2392

2393

2394class figure(General, Element):

2395 """A formal figure, generally an illustration, with a title."""

2396 valid_attributes: Final = Element.valid_attributes + ('align', 'width')

2397 content_model: Final = (((image, reference), '.'),

2398 (caption, '?'),

2399 (legend, '?'),

2400 )

2401 # (image, ((caption, legend?) | legend))

2402 # TODO: According to the DTD, a caption or legend is required

2403 # but rST allows "bare" figures which are formatted differently from

2404 # images (floating in LaTeX, nested in a <figure> in HTML). [bugs: #489]

2405

2406

2407# Tables

2408# ------

2409

2410class entry(Part, Element):

2411 """An entry in a `row` (a table cell)."""

2412 valid_attributes: Final = Element.valid_attributes + (

2413 'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',

2414 'morerows', 'namest', 'nameend', 'rowsep', 'valign')

2415 content_model: Final = ((Body, '*'),)

2416 # %tbl.entry.mdl -> (%body.elements;)*

2417

2418

2419class row(Part, Element):

2420 """Row of table cells."""

2421 valid_attributes: Final = Element.valid_attributes + ('rowsep', 'valign')

2422 content_model: Final = ((entry, '+'),) # (%tbl.row.mdl;) -> entry+

2423

2424

2425class colspec(Part, Element):

2426 """Specifications for a column in a `tgroup`."""

2427 valid_attributes: Final = Element.valid_attributes + (

2428 'align', 'char', 'charoff', 'colname', 'colnum',

2429 'colsep', 'colwidth', 'rowsep', 'stub')

2430

2431 def propwidth(self) -> int|float:

2432 """Return numerical value of "colwidth__" attribute. Default 1.

2433

2434 Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.

2435

2436 Provisional.

2437

2438 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth

2439 """

2440 # Move current implementation of validate_colwidth() here

2441 # in Docutils 1.0

2442 return validate_colwidth(self.get('colwidth', ''))

2443

2444

2445class thead(Part, Element):

2446 """Row(s) that form the head of a `tgroup`."""

2447 valid_attributes: Final = Element.valid_attributes + ('valign',)

2448 content_model: Final = ((row, '+'),) # (row+)

2449

2450

2451class tbody(Part, Element):

2452 """Body of a `tgroup`."""

2453 valid_attributes: Final = Element.valid_attributes + ('valign',)

2454 content_model: Final = ((row, '+'),) # (row+)

2455

2456

2457class tgroup(Part, Element):

2458 """A portion of a table. Most tables have just one `tgroup`."""

2459 valid_attributes: Final = Element.valid_attributes + (

2460 'align', 'cols', 'colsep', 'rowsep')

2461 content_model: Final = ((colspec, '*'), (thead, '?'), (tbody, '.'))

2462 # (colspec*, thead?, tbody)

2463

2464

2465class table(General, Element):

2466 """A data arrangement with rows and columns."""

2467 valid_attributes: Final = Element.valid_attributes + (

2468 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')

2469 content_model: Final = ((title, '?'), (tgroup, '+'))

2470 # (title?, tgroup+)

2471

2472

2473# Special purpose elements

2474# ------------------------

2475# Body elements for internal use or special requests.

2476

2477class comment(Invisible, FixedTextElement, PureTextElement):

2478 """Author notes, hidden from the output."""

2479

2480

2481class substitution_definition(Invisible, TextElement):

2482 valid_attributes: Final = Element.valid_attributes + ('ltrim', 'rtrim')

2483

2484

2485class target(Invisible, Inline, TextElement, Targetable):

2486 valid_attributes: Final = Element.valid_attributes + (

2487 'anonymous', 'refid', 'refname', 'refuri')

2488

2489

2490class system_message(Special, BackLinkable, PreBibliographic, Element):

2491 """

2492 System message element.

2493

2494 Do not instantiate this class directly; use

2495 ``document.reporter.info/warning/error/severe()`` instead.

2496 """

2497 valid_attributes: Final = BackLinkable.valid_attributes + (

2498 'level', 'line', 'type')

2499 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2500

2501 def __init__(self,

2502 message: str | None = None,

2503 *children,

2504 **attributes: Any,

2505 ) -> None:

2506 rawsource = attributes.pop('rawsource', '')

2507 if message:

2508 p = paragraph('', message)

2509 children = (p,) + children

2510 try:

2511 Element.__init__(self, rawsource, *children, **attributes)

2512 except: # NoQA: E722 (catchall)

2513 print('system_message: children=%r' % (children,))

2514 raise

2515

2516 def astext(self) -> str:

2517 line = self.get('line', '')

2518 return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],

2519 self['level'], Element.astext(self))

2520

2521

2522class pending(Invisible, Element):

2523 """

2524 Placeholder for pending operations.

2525

2526 The "pending" element is used to encapsulate a pending operation: the

2527 operation (transform), the point at which to apply it, and any data it

2528 requires. Only the pending operation's location within the document is

2529 stored in the public document tree (by the "pending" object itself); the

2530 operation and its data are stored in the "pending" object's internal

2531 instance attributes.

2532

2533 For example, say you want a table of contents in your reStructuredText

2534 document. The easiest way to specify where to put it is from within the

2535 document, with a directive::

2536

2537 .. contents::

2538

2539 But the "contents" directive can't do its work until the entire document

2540 has been parsed and possibly transformed to some extent. So the directive

2541 code leaves a placeholder behind that will trigger the second phase of its

2542 processing, something like this::

2543

2544 <pending ...public attributes...> + internal attributes

2545

2546 Use `document.note_pending()` so that the

2547 `docutils.transforms.Transformer` stage of processing can run all pending

2548 transforms.

2549 """

2550

2551 def __init__(self,

2552 transform: Transform,

2553 details: Mapping[str, Any] | None = None,

2554 rawsource: str = '',

2555 *children,

2556 **attributes: Any,

2557 ) -> None:

2558 Element.__init__(self, rawsource, *children, **attributes)

2559

2560 self.transform: Transform = transform

2561 """The `docutils.transforms.Transform` class implementing the pending

2562 operation."""

2563

2564 self.details: Mapping[str, Any] = details or {}

2565 """Detail data (dictionary) required by the pending operation."""

2566

2567 def pformat(self, indent: str = ' ', level: int = 0) -> str:

2568 internals = ['.. internal attributes:',

2569 ' .transform: %s.%s' % (self.transform.__module__,

2570 self.transform.__name__),

2571 ' .details:']

2572 details = sorted(self.details.items())

2573 for key, value in details:

2574 if isinstance(value, Node):

2575 internals.append('%7s%s:' % ('', key))

2576 internals.extend(['%9s%s' % ('', line)

2577 for line in value.pformat().splitlines()])

2578 elif (value

2579 and isinstance(value, list)

2580 and isinstance(value[0], Node)):

2581 internals.append('%7s%s:' % ('', key))

2582 for v in value:

2583 internals.extend(['%9s%s' % ('', line)

2584 for line in v.pformat().splitlines()])

2585 else:

2586 internals.append('%7s%s: %r' % ('', key, value))

2587 return (Element.pformat(self, indent, level)

2588 + ''.join((' %s%s\n' % (indent * level, line))

2589 for line in internals))

2590

2591 def copy(self) -> Self:

2592 obj = self.__class__(self.transform, self.details, self.rawsource,

2593 **self.attributes)

2594 obj._document = self._document

2595 obj.source = self.source

2596 obj.line = self.line

2597 return obj

2598

2599

2600class raw(Special, Inline, PreBibliographic,

2601 FixedTextElement, PureTextElement):

2602 """Raw data that is to be passed untouched to the Writer.

2603

2604 Can be used as Body element or Inline element.

2605 """

2606 valid_attributes: Final = Element.valid_attributes + (

2607 'format', 'xml:space')

2608

2609

2610# Inline Elements

2611# ===============

2612

2613class abbreviation(Inline, TextElement): pass

2614class acronym(Inline, TextElement): pass

2615class emphasis(Inline, TextElement): pass

2616class generated(Inline, TextElement): pass

2617class inline(Inline, TextElement): pass

2618class literal(Inline, TextElement): pass

2619class strong(Inline, TextElement): pass

2620class subscript(Inline, TextElement): pass

2621class superscript(Inline, TextElement): pass

2622class title_reference(Inline, TextElement): pass

2623

2624

2625class footnote_reference(Inline, Referential, PureTextElement):

2626 valid_attributes: Final = Element.valid_attributes + (

2627 'auto', 'refid', 'refname')

2628

2629

2630class citation_reference(Inline, Referential, PureTextElement):

2631 valid_attributes: Final = Element.valid_attributes + ('refid', 'refname')

2632

2633

2634class substitution_reference(Inline, TextElement):

2635 valid_attributes: Final = Element.valid_attributes + ('refname',)

2636

2637

2638class math(Inline, PureTextElement):

2639 """Mathematical notation in running text."""

2640

2641

2642class problematic(Inline, TextElement):

2643 valid_attributes: Final = Element.valid_attributes + (

2644 'refid', 'refname', 'refuri')

2645

2646

2647# ========================================

2648# Auxiliary Classes, Functions, and Data

2649# ========================================

2650

2651node_class_names: Sequence[str] = """

2652 Text

2653 abbreviation acronym address admonition attention attribution author

2654 authors

2655 block_quote bullet_list

2656 caption caution citation citation_reference classifier colspec comment

2657 compound contact container copyright

2658 danger date decoration definition definition_list definition_list_item

2659 description docinfo doctest_block document

2660 emphasis entry enumerated_list error

2661 field field_body field_list field_name figure footer

2662 footnote footnote_reference

2663 generated

2664 header hint

2665 image important inline

2666 label legend line line_block list_item literal literal_block

2667 math math_block meta

2668 note

2669 option option_argument option_group option_list option_list_item

2670 option_string organization

2671 paragraph pending problematic

2672 raw reference revision row rubric

2673 section sidebar status strong subscript substitution_definition

2674 substitution_reference subtitle superscript system_message

2675 table target tbody term tgroup thead tip title title_reference topic

2676 transition

2677 version

2678 warning""".split()

2679"""A list of names of all concrete Node subclasses."""

2680

2681

2682class NodeVisitor:

2683 """

2684 "Visitor" pattern [GoF95]_ abstract superclass implementation for

2685 document tree traversals.

2686

2687 Each node class has corresponding methods, doing nothing by

2688 default; override individual methods for specific and useful

2689 behaviour. The `dispatch_visit()` method is called by

2690 `Node.walk()` upon entering a node. `Node.walkabout()` also calls

2691 the `dispatch_departure()` method before exiting a node.

2692

2693 The dispatch methods call "``visit_`` + node class name" or

2694 "``depart_`` + node class name", resp.

2695

2696 This is a base class for visitors whose ``visit_...`` & ``depart_...``

2697 methods must be implemented for *all* compulsory node types encountered

2698 (such as for `docutils.writers.Writer` subclasses).

2699 Unimplemented methods will raise exceptions (except for optional nodes).

2700

2701 For sparse traversals, where only certain node types are of interest, use

2702 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform

2703 processing is desired, subclass `GenericNodeVisitor`.

2704

2705 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of

2706 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,

2707 1995.

2708 """

2709

2710 optional: ClassVar[tuple[str, ...]] = ('meta',)

2711 """

2712 Tuple containing node class names (as strings).

2713

2714 No exception will be raised if writers do not implement visit

2715 or departure functions for these node classes.

2716

2717 Used to ensure transitional compatibility with existing 3rd-party writers.

2718 """

2719

2720 def __init__(self, document: document, /) -> None:

2721 self.document: document = document

2722

2723 def dispatch_visit(self, node) -> None:

2724 """

2725 Call self."``visit_`` + node class name" with `node` as

2726 parameter. If the ``visit_...`` method does not exist, call

2727 self.unknown_visit.

2728 """

2729 node_name = node.__class__.__name__

2730 method = getattr(self, 'visit_' + node_name, self.unknown_visit)

2731 self.document.reporter.debug(

2732 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'

2733 % (method.__name__, node_name))

2734 return method(node)

2735

2736 def dispatch_departure(self, node) -> None:

2737 """

2738 Call self."``depart_`` + node class name" with `node` as

2739 parameter. If the ``depart_...`` method does not exist, call

2740 self.unknown_departure.

2741 """

2742 node_name = node.__class__.__name__

2743 method = getattr(self, 'depart_' + node_name, self.unknown_departure)

2744 self.document.reporter.debug(

2745 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'

2746 % (method.__name__, node_name))

2747 return method(node)

2748

2749 def unknown_visit(self, node) -> None:

2750 """

2751 Called when entering unknown `Node` types.

2752

2753 Raise an exception unless overridden.

2754 """

2755 if (self.document.settings.strict_visitor

2756 or node.__class__.__name__ not in self.optional):

2757 raise NotImplementedError(

2758 '%s visiting unknown node type: %s'

2759 % (self.__class__, node.__class__.__name__))

2760

2761 def unknown_departure(self, node) -> None:

2762 """

2763 Called before exiting unknown `Node` types.

2764

2765 Raise exception unless overridden.

2766 """

2767 if (self.document.settings.strict_visitor

2768 or node.__class__.__name__ not in self.optional):

2769 raise NotImplementedError(

2770 '%s departing unknown node type: %s'

2771 % (self.__class__, node.__class__.__name__))

2772

2773

2774class SparseNodeVisitor(NodeVisitor):

2775 """

2776 Base class for sparse traversals, where only certain node types are of

2777 interest. When ``visit_...`` & ``depart_...`` methods should be

2778 implemented for *all* node types (such as for `docutils.writers.Writer`

2779 subclasses), subclass `NodeVisitor` instead.

2780 """

2781

2782

2783class GenericNodeVisitor(NodeVisitor):

2784 """

2785 Generic "Visitor" abstract superclass, for simple traversals.

2786

2787 Unless overridden, each ``visit_...`` method calls `default_visit()`, and

2788 each ``depart_...`` method (when using `Node.walkabout()`) calls

2789 `default_departure()`. `default_visit()` (and `default_departure()`) must

2790 be overridden in subclasses.

2791

2792 Define fully generic visitors by overriding `default_visit()` (and

2793 `default_departure()`) only. Define semi-generic visitors by overriding

2794 individual ``visit_...()`` (and ``depart_...()``) methods also.

2795

2796 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should

2797 be overridden for default behavior.

2798 """

2799

2800 def default_visit(self, node):

2801 """Override for generic, uniform traversals."""

2802 raise NotImplementedError

2803

2804 def default_departure(self, node):

2805 """Override for generic, uniform traversals."""

2806 raise NotImplementedError

2807

2808

2809def _call_default_visit(self: GenericNodeVisitor, node) -> None:

2810 self.default_visit(node)

2811

2812

2813def _call_default_departure(self: GenericNodeVisitor, node) -> None:

2814 self.default_departure(node)

2815

2816

2817def _nop(self: SparseNodeVisitor, node) -> None:

2818 pass

2819

2820

2821def _add_node_class_names(names) -> None:

2822 """Save typing with dynamic assignments:"""

2823 for _name in names:

2824 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)

2825 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)

2826 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)

2827 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)

2828

2829

2830_add_node_class_names(node_class_names)

2831

2832

2833class TreeCopyVisitor(GenericNodeVisitor):

2834 """

2835 Make a complete copy of a tree or branch, including element attributes.

2836 """

2837

2838 def __init__(self, document: document) -> None:

2839 super().__init__(document)

2840 self.parent_stack: list[list] = []

2841 self.parent: list = []

2842

2843 def get_tree_copy(self):

2844 return self.parent[0]

2845

2846 def default_visit(self, node) -> None:

2847 """Copy the current node, and make it the new acting parent."""

2848 newnode = node.copy()

2849 self.parent.append(newnode)

2850 self.parent_stack.append(self.parent)

2851 self.parent = newnode

2852

2853 def default_departure(self, node) -> None:

2854 """Restore the previous acting parent."""

2855 self.parent = self.parent_stack.pop()

2856

2857

2858# Custom Exceptions

2859# =================

2860

2861class ValidationError(ValueError):

2862 """Invalid Docutils Document Tree Element."""

2863 def __init__(self, msg: str, problematic_element: Element = None) -> None:

2864 super().__init__(msg)

2865 self.problematic_element = problematic_element

2866

2867

2868class TreePruningException(Exception):

2869 """

2870 Base class for `NodeVisitor`-related tree pruning exceptions.

2871

2872 Raise subclasses from within ``visit_...`` or ``depart_...`` methods

2873 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune

2874 the tree traversed.

2875 """

2876

2877

2878class SkipChildren(TreePruningException):

2879 """

2880 Do not visit any children of the current node. The current node's

2881 siblings and ``depart_...`` method are not affected.

2882 """

2883

2884

2885class SkipSiblings(TreePruningException):

2886 """

2887 Do not visit any more siblings (to the right) of the current node. The

2888 current node's children and its ``depart_...`` method are not affected.

2889 """

2890

2891

2892class SkipNode(TreePruningException):

2893 """

2894 Do not visit the current node's children, and do not call the current

2895 node's ``depart_...`` method.

2896 """

2897

2898

2899class SkipDeparture(TreePruningException):

2900 """

2901 Do not call the current node's ``depart_...`` method. The current node's

2902 children and siblings are not affected.

2903 """

2904

2905

2906class NodeFound(TreePruningException):

2907 """

2908 Raise to indicate that the target of a search has been found. This

2909 exception must be caught by the client; it is not caught by the traversal

2910 code.

2911 """

2912

2913

2914class StopTraversal(TreePruningException):

2915 """

2916 Stop the traversal altogether. The current node's ``depart_...`` method

2917 is not affected. The parent nodes ``depart_...`` methods are also called

2918 as usual. No other nodes are visited. This is an alternative to

2919 NodeFound that does not cause exception handling to trickle up to the

2920 caller.

2921 """

2922

2923

2924# definition moved here from `utils` to avoid circular import dependency

2925def unescape(text: str,

2926 restore_backslashes: bool = False,

2927 respect_whitespace: bool = False,

2928 ) -> str:

2929 """

2930 Return a string with nulls removed or restored to backslashes.

2931 Backslash-escaped spaces are also removed.

2932 """

2933 # `respect_whitespace` is ignored (since introduction 2016-12-16)

2934 if restore_backslashes:

2935 return text.replace('\x00', '\\')

2936 else:

2937 for sep in ['\x00 ', '\x00\n', '\x00']:

2938 text = ''.join(text.split(sep))

2939 return text

2940

2941

2942def make_id(string: str) -> str:

2943 """

2944 Convert `string` into an identifier and return it.

2945

2946 Docutils identifiers will conform to the regular expression

2947 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"

2948 and "id" attributes) should have no underscores, colons, or periods.

2949 Hyphens may be used.

2950

2951 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:

2952

2953 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be

2954 followed by any number of letters, digits ([0-9]), hyphens ("-"),

2955 underscores ("_"), colons (":"), and periods (".").

2956

2957 - However the `CSS1 spec`_ defines identifiers based on the "name" token,

2958 a tighter interpretation ("flex" tokenizer notation; "latin1" and

2959 "escape" 8-bit characters have been replaced with entities)::

2960

2961 unicode \\[0-9a-f]{1,4}

2962 latin1 [¡-ÿ]

2963 escape {unicode}|\\[ -~¡-ÿ]

2964 nmchar [-a-z0-9]|{latin1}|{escape}

2965 name {nmchar}+

2966

2967 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),

2968 or periods ("."), therefore "class" and "id" attributes should not contain

2969 these characters. They should be replaced with hyphens ("-"). Combined

2970 with HTML's requirements (the first character must be a letter; no

2971 "unicode", "latin1", or "escape" characters), this results in the

2972 ``[a-z](-?[a-z0-9]+)*`` pattern.

2973

2974 .. _HTML 4.01 spec: https://www.w3.org/TR/html401

2975 .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1

2976 """

2977 id = string.lower()

2978 id = id.translate(_non_id_translate_digraphs)

2979 id = id.translate(_non_id_translate)

2980 # get rid of non-ascii characters.

2981 # 'ascii' lowercase to prevent problems with turkish locale.

2982 id = unicodedata.normalize(

2983 'NFKD', id).encode('ascii', 'ignore').decode('ascii')

2984 # shrink runs of whitespace and replace by hyphen

2985 id = _non_id_chars.sub('-', ' '.join(id.split()))

2986 id = _non_id_at_ends.sub('', id)

2987 return str(id)

2988

2989

2990_non_id_chars: re.Pattern[str] = re.compile('[^a-z0-9]+')

2991_non_id_at_ends: re.Pattern[str] = re.compile('^[-0-9]+|-+$')

2992_non_id_translate: dict[int, str] = {

2993 0x00f8: 'o', # o with stroke

2994 0x0111: 'd', # d with stroke

2995 0x0127: 'h', # h with stroke

2996 0x0131: 'i', # dotless i

2997 0x0142: 'l', # l with stroke

2998 0x0167: 't', # t with stroke

2999 0x0180: 'b', # b with stroke

3000 0x0183: 'b', # b with topbar

3001 0x0188: 'c', # c with hook

3002 0x018c: 'd', # d with topbar

3003 0x0192: 'f', # f with hook

3004 0x0199: 'k', # k with hook

3005 0x019a: 'l', # l with bar

3006 0x019e: 'n', # n with long right leg

3007 0x01a5: 'p', # p with hook

3008 0x01ab: 't', # t with palatal hook

3009 0x01ad: 't', # t with hook

3010 0x01b4: 'y', # y with hook

3011 0x01b6: 'z', # z with stroke

3012 0x01e5: 'g', # g with stroke

3013 0x0225: 'z', # z with hook

3014 0x0234: 'l', # l with curl

3015 0x0235: 'n', # n with curl

3016 0x0236: 't', # t with curl

3017 0x0237: 'j', # dotless j

3018 0x023c: 'c', # c with stroke

3019 0x023f: 's', # s with swash tail

3020 0x0240: 'z', # z with swash tail

3021 0x0247: 'e', # e with stroke

3022 0x0249: 'j', # j with stroke

3023 0x024b: 'q', # q with hook tail

3024 0x024d: 'r', # r with stroke

3025 0x024f: 'y', # y with stroke

3026}

3027_non_id_translate_digraphs: dict[int, str] = {

3028 0x00df: 'sz', # ligature sz

3029 0x00e6: 'ae', # ae

3030 0x0153: 'oe', # ligature oe

3031 0x0238: 'db', # db digraph

3032 0x0239: 'qp', # qp digraph

3033}

3034

3035

3036def dupname(node: Element, name: str) -> None:

3037 node['dupnames'].append(name)

3038 node['names'].remove(name)

3039 # Assume that `node` is referenced, even though it isn't;

3040 # we don't want to throw unnecessary system_messages.

3041 node.referenced = True

3042

3043

3044def fully_normalize_name(name: str) -> str:

3045 """Return a case- and whitespace-normalized name."""

3046 return ' '.join(name.lower().split())

3047

3048

3049def whitespace_normalize_name(name: str) -> str:

3050 """Return a whitespace-normalized name."""

3051 return ' '.join(name.split())

3052

3053

3054def serial_escape(value: str) -> str:

3055 """Escape string values that are elements of a list, for serialization."""

3056 return value.replace('\\', r'\\').replace(' ', r'\ ')

3057

3058

3059def split_name_list(s: str) -> list[str]:

3060 r"""Split a string at non-escaped whitespace.

3061

3062 Backslashes escape internal whitespace (cf. `serial_escape()`).

3063 Return list of "names" (after removing escaping backslashes).

3064

3065 >>> split_name_list(r'a\ n\ame two\\ n\\ames'),

3066 ['a name', 'two\\', r'n\ames']

3067

3068 Provisional.

3069 """

3070 s = s.replace('\\', '\x00') # escape with NULL char

3071 s = s.replace('\x00\x00', '\\') # unescape backslashes

3072 s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL

3073 names = s.split(' ')

3074 # restore internal spaces, drop other escaping characters

3075 return [name.replace('\x00\x00', ' ').replace('\x00', '')

3076 for name in names]

3077

3078

3079def pseudo_quoteattr(value: str) -> str:

3080 """Quote attributes for pseudo-xml"""

3081 return '"%s"' % value

3082

3083

3084def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'

3085 ) -> tuple[int|float, str]:

3086 """Parse a measure__, return value + unit.

3087

3088 `unit_pattern` is a regular expression describing recognized units.

3089 The default is suited for (but not limited to) CSS3 units and SI units.

3090 It matches runs of ASCII letters or Greek mu, a single percent sign,

3091 or no unit.

3092

3093 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure

3094

3095 Provisional.

3096 """

3097 match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)

3098 try:

3099 try:

3100 value = int(match.group(1))

3101 except ValueError:

3102 value = float(match.group(1))

3103 unit = match.group(2)

3104 except (AttributeError, ValueError):

3105 raise ValueError(f'"{measure}" is no valid measure.')

3106 return value, unit

3107

3108

3109# Methods to validate `Element attribute`__ values.

3110

3111# Ensure the expected Python `data type`__, normalize, and check for

3112# restrictions.

3113#

3114# The methods can be used to convert `str` values (eg. from an XML

3115# representation) or to validate an existing document tree or node.

3116#

3117# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,

3118# and the `attribute_validating_functions` mapping below.

3119#

3120# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference

3121# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types

3122

3123def create_keyword_validator(*keywords: str) -> Callable[[str], str]:

3124 """

3125 Return a function that validates a `str` against given `keywords`.

3126

3127 Provisional.

3128 """

3129 def validate_keywords(value: str) -> str:

3130 if value not in keywords:

3131 allowed = '", \"'.join(keywords)

3132 raise ValueError(f'"{value}" is not one of "{allowed}".')

3133 return value

3134 return validate_keywords

3135

3136

3137def validate_identifier(value: str) -> str:

3138 """

3139 Validate identifier key or class name.

3140

3141 Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.

3142

3143 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type

3144

3145 Provisional.

3146 """

3147 if value != make_id(value):

3148 raise ValueError(f'"{value}" is no valid id or class name.')

3149 return value

3150

3151

3152def validate_identifier_list(value: str | list[str]) -> list[str]:

3153 """

3154 A (space-separated) list of ids or class names.

3155

3156 `value` may be a `list` or a `str` with space separated

3157 ids or class names (cf. `validate_identifier()`).

3158

3159 Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.

3160

3161 __ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type

3162 __ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type

3163 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type

3164

3165 Provisional.

3166 """

3167 if isinstance(value, str):

3168 value = value.split()

3169 for token in value:

3170 validate_identifier(token)

3171 return value

3172

3173

3174def validate_measure(measure: str) -> str:

3175 """

3176 Validate a measure__ (number + optional unit). Return normalized `str`.

3177

3178 See `parse_measure()` for a function returning a "number + unit" tuple.

3179

3180 The unit may be a run of ASCII letters or Greek mu, a single percent sign,

3181 or the empty string. Case is preserved.

3182

3183 Provisional.

3184

3185 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure

3186 """

3187 value, unit = parse_measure(measure)

3188 return f'{value}{unit}'

3189

3190

3191def validate_colwidth(measure: str|int|float) -> int|float:

3192 """Validate the "colwidth__" attribute.

3193

3194 Provisional:

3195 `measure` must be a `str` and will be returned as normalized `str`

3196 (with unit "*" for proportional values) in Docutils 1.0.

3197

3198 The default unit will change to "pt" in Docutils 2.0.

3199

3200 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth

3201 """

3202 if isinstance(measure, (int, float)):

3203 value = measure

3204 elif measure in ('*', ''): # short for '1*'

3205 value = 1

3206 else:

3207 try:

3208 value, _unit = parse_measure(measure, unit_pattern='[*]?')

3209 except ValueError:

3210 value = -1

3211 if value <= 0:

3212 raise ValueError(f'"{measure}" is no proportional measure.')

3213 return value

3214

3215

3216def validate_NMTOKEN(value: str) -> str:

3217 """

3218 Validate a "name token": a `str` of ASCII letters, digits, and [-._].

3219

3220 Provisional.

3221 """

3222 if not re.fullmatch('[-._A-Za-z0-9]+', value):

3223 raise ValueError(f'"{value}" is no NMTOKEN.')

3224 return value

3225

3226

3227def validate_NMTOKENS(value: str | list[str]) -> list[str]:

3228 """

3229 Validate a list of "name tokens".

3230

3231 Provisional.

3232 """

3233 if isinstance(value, str):

3234 value = value.split()

3235 for token in value:

3236 validate_NMTOKEN(token)

3237 return value

3238

3239

3240def validate_refname_list(value: str | list[str]) -> list[str]:

3241 """

3242 Validate a list of `reference names`__.

3243

3244 Reference names may contain all characters;

3245 whitespace is normalized (cf, `whitespace_normalize_name()`).

3246

3247 `value` may be either a `list` of names or a `str` with

3248 space separated names (with internal spaces backslash escaped

3249 and literal backslashes doubled cf. `serial_escape()`).

3250

3251 Return a list of whitespace-normalized, unescaped reference names.

3252

3253 Provisional.

3254

3255 __ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name

3256 """

3257 if isinstance(value, str):

3258 value = split_name_list(value)

3259 return [whitespace_normalize_name(name) for name in value]

3260

3261

3262def validate_yesorno(value: str | int | bool) -> bool:

3263 """Validate a `%yesorno`__ (flag) value.

3264

3265 The string literal "0" evaluates to ``False``, all other

3266 values are converterd with `bool()`.

3267

3268 __ https://docutils.sourceforge.io/docs/ref/doctree.html#yesorno

3269 """

3270 if value == "0":

3271 return False

3272 return bool(value)

3273

3274

3275ATTRIBUTE_VALIDATORS: dict[str, Callable[[str], Any]] = {

3276 'alt': str, # CDATA

3277 'align': str,

3278 'anonymous': validate_yesorno,

3279 'auto': str, # CDATA (only '1' or '*' are used in rST)

3280 'backrefs': validate_identifier_list,

3281 'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)

3282 'classes': validate_identifier_list,

3283 'char': str, # from Exchange Table Model (CALS), currently ignored

3284 'charoff': validate_NMTOKEN, # from CALS, currently ignored

3285 'colname': validate_NMTOKEN, # from CALS, currently ignored

3286 'colnum': int, # from CALS, currently ignored

3287 'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".

3288 'colsep': validate_yesorno,

3289 'colwidth': validate_colwidth, # see docstring for pending changes

3290 'content': str, # <meta>

3291 'delimiter': str,

3292 'dir': create_keyword_validator('ltr', 'rtl', 'auto'), # <meta>

3293 'dupnames': validate_refname_list,

3294 'enumtype': create_keyword_validator('arabic', 'loweralpha', 'lowerroman',

3295 'upperalpha', 'upperroman'),

3296 'format': str, # CDATA (space separated format names)

3297 'frame': create_keyword_validator('top', 'bottom', 'topbot', 'all',

3298 'sides', 'none'), # from CALS, ignored

3299 'height': validate_measure,

3300 'http-equiv': str, # <meta>

3301 'ids': validate_identifier_list,

3302 'lang': str, # <meta>

3303 'level': int,

3304 'line': int,

3305 'ltrim': validate_yesorno,

3306 'loading': create_keyword_validator('embed', 'link', 'lazy'),

3307 'media': str, # <meta>

3308 'morecols': int,

3309 'morerows': int,

3310 'name': whitespace_normalize_name, # in <reference> (deprecated)

3311 # 'name': node_attributes.validate_NMTOKEN, # in <meta>

3312 'names': validate_refname_list,

3313 'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored

3314 'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored

3315 'pgwide': validate_yesorno, # from CALS, currently ignored

3316 'prefix': str,

3317 'refid': validate_identifier,

3318 'refname': whitespace_normalize_name,

3319 'refuri': str,

3320 'rowsep': validate_yesorno,

3321 'rtrim': validate_yesorno,

3322 'scale': int,

3323 'scheme': str,

3324 'source': str,

3325 'start': int,

3326 'stub': validate_yesorno,

3327 'suffix': str,

3328 'title': str,

3329 'type': validate_NMTOKEN,

3330 'uri': str,

3331 'valign': create_keyword_validator('top', 'middle', 'bottom'), # from CALS

3332 'width': validate_measure,

3333 'xml:space': create_keyword_validator('default', 'preserve'),

3334 }

3335"""

3336Mapping of `attribute names`__ to validating functions.

3337

3338Provisional.

3339

3340__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference

3341"""