Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/docutils/nodes.py: 47%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Maintainer: docutils-develop@lists.sourceforge.net

4# Copyright: This module has been placed in the public domain.

6"""

7Docutils document tree element class library.

9Classes in CamelCase are abstract base classes or auxiliary classes. The one

10exception is `Text`, for a text (PCDATA) node; uppercase is used to

11differentiate from element classes. Classes in lower_case_with_underscores

12are element classes, matching the XML element generic identifiers in the DTD_.

14The position of each node (the level at which it can occur) is significant and

15is represented by abstract base classes (`Root`, `Structural`, `Body`,

16`Inline`, etc.). Certain transformations will be easier because we can use

17``isinstance(node, base_class)`` to determine the position of the node in the

18hierarchy.

20.. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd

21"""

23from __future__ import annotations

25__docformat__ = 'reStructuredText'

27import os

28import re

29import sys

30import unicodedata

31import warnings

32from collections import Counter

33from typing import TYPE_CHECKING, overload

34# import xml.dom.minidom as dom # -> conditional import in Node.asdom()

35# and document.asdom()

37# import docutils.transforms # -> delayed import in document.__init__()

39if TYPE_CHECKING:

40 from collections.abc import (Callable, Iterable, Iterator,

41 Mapping, Sequence)

42 from types import ModuleType

43 from typing import Any, ClassVar, Final, Literal, Self, SupportsIndex

44 if sys.version_info[:2] >= (3, 12):

45 from typing import TypeAlias

46 else:

47 from typing_extensions import TypeAlias

49 from xml.dom import minidom

51 from docutils.frontend import Values

52 from docutils.transforms import Transformer, Transform

53 from docutils.utils import Reporter

55 _ContentModelCategory: TypeAlias = tuple['Element' | tuple['Element', ...]]

56 _ContentModelQuantifier = Literal['.', '?', '+', '*']

57 _ContentModelItem: TypeAlias = tuple[_ContentModelCategory,

58 _ContentModelQuantifier]

59 _ContentModelTuple: TypeAlias = tuple[_ContentModelItem, ...]

61 StrPath: TypeAlias = str | os.PathLike[str]

62 """File system path. No bytes!"""

64 _UpdateFun: TypeAlias = Callable[[str, Any, bool], None]

67# ==============================

68# Functional Node Base Classes

69# ==============================

71class Node:

72 """Abstract base class of nodes in a document tree."""

74 parent: Element | None = None

75 """Back-reference to the Node immediately containing this Node."""

77 children: Sequence[Node] = ()

78 """Sequence of child nodes.

80 Override in subclass instances that are not terminal nodes.

81 """

83 source: StrPath | None = None

84 """Path or description of the input source which generated this Node."""

86 line: int | None = None

87 """The line number (1-based) of the beginning of this Node in `source`."""

89 tagname: str # defined in subclasses

90 """The element generic identifier."""

92 _document: document | None = None

94 @property

95 def document(self) -> document | None:

96 """Return the `document` root node of the tree containing this Node.

97 """

98 try:

99 return self._document or self.parent.document

100 except AttributeError:

101 return None

102

103 @document.setter

104 def document(self, value: document) -> None:

105 self._document = value

106

107 def __bool__(self) -> Literal[True]:

108 """

109 Node instances are always true, even if they're empty. A node is more

110 than a simple container. Its boolean "truth" does not depend on

111 having one or more subnodes in the doctree.

112

113 Use `len()` to check node length.

114 """

115 return True

116

117 def asdom(self,

118 dom: ModuleType | None = None,

119 ) -> minidom.Document | minidom.Element | minidom.Text:

120 # TODO: minidom.Document is only returned by document.asdom()

121 # (which overwrites this base-class implementation)

122 """Return a DOM **fragment** representation of this Node."""

123 if dom is None:

124 import xml.dom.minidom as dom

125 domroot = dom.Document()

126 return self._dom_node(domroot)

127

128 def _dom_node(self, domroot: minidom.Document) -> minidom.Text:

129 # Stub. Override in subclasses.

130 return domroot.createElement(self.__class__.__name__)

131

132 def shortrepr(self) -> str:

133 # concise string representation for test and debugging purposes

134 return repr(self)

135

136 def pformat(self, indent: str = ' ', level: int = 0) -> str:

137 """

138 Return an indented pseudo-XML representation, for test purposes.

139

140 Override in subclasses.

141 """

142 raise NotImplementedError

143

144 def copy(self) -> Self:

145 """Return a copy of self."""

146 raise NotImplementedError

147

148 def deepcopy(self) -> Self:

149 """Return a deep copy of self (also copying children)."""

150 raise NotImplementedError

151

152 def astext(self) -> str:

153 """Return a string representation of this Node."""

154 raise NotImplementedError

155

156 def setup_child(self, child: Node) -> None:

157 child.parent = self

158 if self.document:

159 child.document = self.document

160 if child.source is None:

161 child.source = self.document.current_source

162 if child.line is None:

163 child.line = self.document.current_line

164

165 def walk(self, visitor: NodeVisitor) -> bool:

166 """

167 Traverse a tree of `Node` objects, calling the

168 `dispatch_visit()` method of `visitor` when entering each

169 node. (The `walkabout()` method is similar, except it also

170 calls the `dispatch_departure()` method before exiting each

171 node.)

172

173 This tree traversal supports limited in-place tree

174 modifications. Replacing one node with one or more nodes is

175 OK, as is removing an element. However, if the node removed

176 or replaced occurs after the current node, the old node will

177 still be traversed, and any new nodes will not.

178

179 Within ``visit`` methods (and ``depart`` methods for

180 `walkabout()`), `TreePruningException` subclasses may be raised

181 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).

182

183 Parameter `visitor`: A `NodeVisitor` object, containing a

184 ``visit`` implementation for each `Node` subclass encountered.

185

186 Return true if we should stop the traversal.

187 """

188 stop = False

189 visitor.document.reporter.debug(

190 'docutils.nodes.Node.walk calling dispatch_visit for %s'

191 % self.__class__.__name__)

192 try:

193 try:

194 visitor.dispatch_visit(self)

195 except (SkipChildren, SkipNode):

196 return stop

197 except SkipDeparture: # not applicable; ignore

198 pass

199 children = self.children

200 try:

201 for child in children[:]:

202 if child.walk(visitor):

203 stop = True

204 break

205 except SkipSiblings:

206 pass

207 except StopTraversal:

208 stop = True

209 return stop

210

211 def walkabout(self, visitor: NodeVisitor) -> bool:

212 """

213 Perform a tree traversal similarly to `Node.walk()` (which

214 see), except also call the `dispatch_departure()` method

215 before exiting each node.

216

217 Parameter `visitor`: A `NodeVisitor` object, containing a

218 ``visit`` and ``depart`` implementation for each `Node`

219 subclass encountered.

220

221 Return true if we should stop the traversal.

222 """

223 call_depart = True

224 stop = False

225 visitor.document.reporter.debug(

226 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'

227 % self.__class__.__name__)

228 try:

229 try:

230 visitor.dispatch_visit(self)

231 except SkipNode:

232 return stop

233 except SkipDeparture:

234 call_depart = False

235 children = self.children

236 try:

237 for child in children[:]:

238 if child.walkabout(visitor):

239 stop = True

240 break

241 except SkipSiblings:

242 pass

243 except SkipChildren:

244 pass

245 except StopTraversal:

246 stop = True

247 if call_depart:

248 visitor.document.reporter.debug(

249 'docutils.nodes.Node.walkabout calling dispatch_departure '

250 'for %s' % self.__class__.__name__)

251 visitor.dispatch_departure(self)

252 return stop

253

254 def _fast_findall(self, cls: type) -> Iterator[Node]:

255 """Return iterator that only supports instance checks."""

256 if isinstance(self, cls):

257 yield self

258 for child in self.children:

259 yield from child._fast_findall(cls)

260

261 def _superfast_findall(self) -> Iterator[Node]:

262 """Return iterator that doesn't check for a condition."""

263 # This is different from ``iter(self)`` implemented via

264 # __getitem__() and __len__() in the Element subclass,

265 # which yields only the direct children.

266 yield self

267 for child in self.children:

268 yield from child._superfast_findall()

269

270 def findall(self,

271 condition: Callable[[Node], bool] | type | None = None,

272 include_self: bool = True,

273 descend: bool = True,

274 siblings: bool = False,

275 ascend: bool = False,

276 ) -> Iterator[Node]:

277 """

278 Return an iterator yielding nodes following `self`:

279

280 * self (if `include_self` is true)

281 * all descendants in tree traversal order (if `descend` is true)

282 * the following siblings (if `siblings` is true) and their

283 descendants (if also `descend` is true)

284 * the following siblings of the parent (if `ascend` is true) and

285 their descendants (if also `descend` is true), and so on.

286

287 If `condition` is not None, the iterator yields only nodes

288 for which ``condition(node)`` is true. If `condition` is a

289 type ``cls``, it is equivalent to a function consisting

290 of ``return isinstance(node, cls)``.

291

292 If `ascend` is true, assume `siblings` to be true as well.

293

294 If the tree structure is modified during iteration, the result

295 is undefined.

296

297 For example, given the following tree::

298

299 <paragraph>

300 <emphasis> <--- emphasis.traverse() and

301 <strong> <--- strong.traverse() are called.

302 Foo

303 Bar

304 <reference name="Baz" refid="baz">

305 Baz

306

307 Then tuple(emphasis.traverse()) equals ::

308

309 (<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)

310

311 and list(strong.traverse(ascend=True) equals ::

312

313 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]

314 """

315 if ascend:

316 siblings = True

317 # Check for special argument combinations that allow using an

318 # optimized version of traverse()

319 if include_self and descend and not siblings:

320 if condition is None:

321 yield from self._superfast_findall()

322 return

323 elif isinstance(condition, type):

324 yield from self._fast_findall(condition)

325 return

326 # Check if `condition` is a class (check for TypeType for Python

327 # implementations that use only new-style classes, like PyPy).

328 if isinstance(condition, type):

329 node_class = condition

330

331 def condition(node, node_class=node_class):

332 return isinstance(node, node_class)

333

334 if include_self and (condition is None or condition(self)):

335 yield self

336 if descend and len(self.children):

337 for child in self:

338 yield from child.findall(condition=condition,

339 include_self=True, descend=True,

340 siblings=False, ascend=False)

341 if siblings or ascend:

342 node = self

343 while node.parent:

344 index = node.parent.index(node)

345 # extra check since Text nodes have value-equality

346 while node.parent[index] is not node:

347 index = node.parent.index(node, index + 1)

348 for sibling in node.parent[index+1:]:

349 yield from sibling.findall(

350 condition=condition,

351 include_self=True, descend=descend,

352 siblings=False, ascend=False)

353 if not ascend:

354 break

355 else:

356 node = node.parent

357

358 def traverse(self,

359 condition: Callable[[Node], bool] | type | None = None,

360 include_self: bool = True,

361 descend: bool = True,

362 siblings: bool = False,

363 ascend: bool = False,

364 ) -> list[Node]:

365 """Return list of nodes following `self`.

366

367 For looping, Node.findall() is faster and more memory efficient.

368 """

369 # traverse() may be eventually removed:

370 warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',

371 DeprecationWarning, stacklevel=2)

372 return list(self.findall(condition, include_self, descend,

373 siblings, ascend))

374

375 def next_node(self,

376 condition: Callable[[Node], bool] | type | None = None,

377 include_self: bool = False,

378 descend: bool = True,

379 siblings: bool = False,

380 ascend: bool = False,

381 ) -> Node | None:

382 """

383 Return the first node in the iterator returned by findall(),

384 or None if the iterable is empty.

385

386 Parameter list is the same as of `findall()`. Note that `include_self`

387 defaults to False, though.

388 """

389 try:

390 return next(self.findall(condition, include_self,

391 descend, siblings, ascend))

392 except StopIteration:

393 return None

394

395 def validate(self, recursive: bool = True) -> None:

396 """Raise ValidationError if this node is not valid.

397

398 Override in subclasses that define validity constraints.

399 """

400

401 def validate_position(self) -> None:

402 """Hook for additional checks of the parent's content model.

403

404 Raise ValidationError, if `self` is at an invalid position.

405

406 Override in subclasses with complex validity constraints. See

407 `subtitle.validate_position()` and `transition.validate_position()`.

408 """

409

410

411class Text(Node, str): # NoQA: SLOT000 (Node doesn't define __slots__)

412 """

413 Instances are terminal nodes (leaves) containing text only; no child

414 nodes or attributes. Initialize by passing a string to the constructor.

415

416 Access the raw (null-escaped) text with ``str(<instance>)``

417 and unescaped text with ``<instance>.astext()``.

418 """

419

420 tagname: Final = '#text'

421

422 children: Final = ()

423 """Text nodes have no children, and cannot have children."""

424

425 def __new__(cls, data: str, rawsource: None = None) -> Self:

426 """Assert that `data` is not an array of bytes

427 and warn if the deprecated `rawsource` argument is used.

428 """

429 if isinstance(data, bytes):

430 raise TypeError('expecting str data, not bytes')

431 if rawsource is not None:

432 warnings.warn('nodes.Text: initialization argument "rawsource" '

433 'is ignored and will be removed in Docutils 2.0.',

434 DeprecationWarning, stacklevel=2)

435 return str.__new__(cls, data)

436

437 def shortrepr(self, maxlen: int = 18) -> str:

438 data = self

439 if len(data) > maxlen:

440 data = data[:maxlen-4] + ' ...'

441 return '<%s: %r>' % (self.tagname, str(data))

442

443 def __repr__(self) -> str:

444 return self.shortrepr(maxlen=68)

445

446 def astext(self) -> str:

447 return str(unescape(self))

448

449 def _dom_node(self, domroot: minidom.Document) -> minidom.Text:

450 return domroot.createTextNode(str(self))

451

452 def copy(self) -> Self:

453 return self.__class__(str(self))

454

455 def deepcopy(self) -> Self:

456 return self.copy()

457

458 def pformat(self, indent: str = ' ', level: int = 0) -> str:

459 try:

460 if self.document.settings.detailed:

461 tag = '%s%s' % (indent*level, '<#text>')

462 lines = (indent*(level+1) + repr(line)

463 for line in self.splitlines(True))

464 return '\n'.join((tag, *lines)) + '\n'

465 except AttributeError:

466 pass

467 indent = indent * level

468 lines = [indent+line for line in self.astext().splitlines()]

469 if not lines:

470 return ''

471 return '\n'.join(lines) + '\n'

472

473 # rstrip and lstrip are used by substitution definitions where

474 # they are expected to return a Text instance, this was formerly

475 # taken care of by UserString.

476

477 def rstrip(self, chars: str | None = None) -> Self:

478 return self.__class__(str.rstrip(self, chars))

479

480 def lstrip(self, chars: str | None = None) -> Self:

481 return self.__class__(str.lstrip(self, chars))

482

483

484class Element(Node):

485 """

486 `Element` is the superclass to all specific elements.

487

488 Elements contain attributes and child nodes.

489 They can be described as a cross between a list and a dictionary.

490

491 Elements emulate dictionaries for external [#]_ attributes, indexing by

492 attribute name (a string). To set the attribute 'att' to 'value', do::

493

494 element['att'] = 'value'

495

496 .. [#] External attributes correspond to the XML element attributes.

497 From its `Node` superclass, Element also inherits "internal"

498 class attributes that are accessed using the standard syntax, e.g.

499 ``element.parent``.

500

501 There are two special attributes: 'ids' and 'names'. Both are

502 lists of unique identifiers: 'ids' conform to the regular expression

503 ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and

504 details). 'names' serve as user-friendly interfaces to IDs; they are

505 case- and whitespace-normalized (see the fully_normalize_name() function).

506

507 Elements emulate lists for child nodes (element nodes and/or text

508 nodes), indexing by integer. To get the first child node, use::

509

510 element[0]

511

512 to iterate over the child nodes (without descending), use::

513

514 for child in element:

515 ...

516

517 Elements may be constructed using the ``+=`` operator. To add one new

518 child node to element, do::

519

520 element += node

521

522 This is equivalent to ``element.append(node)``.

523

524 To add a list of multiple child nodes at once, use the same ``+=``

525 operator::

526

527 element += [node1, node2]

528

529 This is equivalent to ``element.extend([node1, node2])``.

530 """

531

532 list_attributes: Final = ('ids', 'classes', 'names', 'dupnames')

533 """Tuple of attributes that are initialized to empty lists.

534

535 NOTE: Derived classes should update this value when supporting

536 additional list attributes.

537 """

538

539 valid_attributes: Final = list_attributes + ('source',)

540 """Tuple of attributes that are valid for elements of this class.

541

542 NOTE: Derived classes should update this value when supporting

543 additional attributes.

544 """

545

546 common_attributes: Final = valid_attributes

547 """Tuple of `common attributes`__ known to all Doctree Element classes.

548

549 __ https://docutils.sourceforge.io/docs/ref/doctree.html#common-attributes

550 """

551

552 known_attributes: Final = common_attributes

553 """Alias for `common_attributes`. Will be removed in Docutils 2.0."""

554

555 basic_attributes: Final = list_attributes

556 """Common list attributes. Deprecated. Will be removed in Docutils 2.0."""

557

558 local_attributes: Final = ('backrefs',)

559 """Obsolete. Will be removed in Docutils 2.0."""

560

561 content_model: ClassVar[_ContentModelTuple] = ()

562 """Python representation of the element's content model (cf. docutils.dtd).

563

564 A tuple of ``(category, quantifier)`` tuples with

565

566 :category: class or tuple of classes that are expected at this place(s)

567 in the list of children

568 :quantifier: string representation stating how many elements

569 of `category` are expected. Value is one of:

570 '.' (exactly one), '?' (zero or one),

571 '+' (one or more), '*' (zero or more).

572

573 NOTE: The default describes the empty element. Derived classes should

574 update this value to match teir content model.

575

576 Provisional.

577 """

578

579 tagname: str | None = None

580 """The element generic identifier.

581

582 If None, it is set as an instance attribute to the name of the class.

583 """

584

585 child_text_separator: Final = '\n\n'

586 """Separator for child nodes, used by `astext()` method."""

587

588 def __init__(self,

589 rawsource: str = '',

590 *children: Node,

591 **attributes: Any,

592 ) -> None:

593 self.rawsource = rawsource

594 """The raw text from which this element was constructed.

595

596 For informative and debugging purposes. Don't rely on its value!

597

598 NOTE: some elements do not set this value (default '').

599 """

600 if isinstance(rawsource, Element):

601 raise TypeError('First argument "rawsource" must be a string.')

602

603 self.children: list[Node] = []

604 """List of child nodes (elements and/or `Text`)."""

605

606 self.extend(children) # maintain parent info

607

608 self.attributes: dict[str, Any] = {}

609 """Dictionary of attribute {name: value}."""

610

611 # Initialize list attributes.

612 for att in self.list_attributes:

613 self.attributes[att] = []

614

615 for att, value in attributes.items():

616 att = att.lower() # normalize attribute name

617 if att in self.list_attributes:

618 # lists are mutable; make a copy for this node

619 self.attributes[att] = value[:]

620 else:

621 self.attributes[att] = value

622

623 if self.tagname is None:

624 self.tagname: str = self.__class__.__name__

625

626 def _dom_node(self, domroot: minidom.Document) -> minidom.Element:

627 element = domroot.createElement(self.tagname)

628 for attribute, value in self.attlist():

629 if isinstance(value, list):

630 value = ' '.join(serial_escape('%s' % (v,)) for v in value)

631 element.setAttribute(attribute, '%s' % value)

632 for child in self.children:

633 element.appendChild(child._dom_node(domroot))

634 return element

635

636 def __repr__(self) -> str:

637 data = ''

638 for c in self.children:

639 data += c.shortrepr()

640 if len(data) > 60:

641 data = data[:56] + ' ...'

642 break

643 if self['names']:

644 return '<%s "%s": %s>' % (self.tagname,

645 '; '.join(self['names']), data)

646 else:

647 return '<%s: %s>' % (self.tagname, data)

648

649 def shortrepr(self) -> str:

650 if self['names']:

651 return '<%s "%s"...>' % (self.tagname, '; '.join(self['names']))

652 else:

653 return '<%s...>' % self.tagname

654

655 def __str__(self) -> str:

656 if self.children:

657 return '%s%s%s' % (self.starttag(),

658 ''.join(str(c) for c in self.children),

659 self.endtag())

660 else:

661 return self.emptytag()

662

663 def starttag(self, quoteattr: Callable[[str], str] | None = None) -> str:

664 # the optional arg is used by the docutils_xml writer

665 if quoteattr is None:

666 quoteattr = pseudo_quoteattr

667 parts = [self.tagname]

668 for name, value in self.attlist():

669 if value is None: # boolean attribute

670 parts.append('%s="True"' % name)

671 continue

672 if isinstance(value, bool):

673 value = str(int(value))

674 if isinstance(value, list):

675 values = [serial_escape('%s' % (v,)) for v in value]

676 value = ' '.join(values)

677 else:

678 value = str(value)

679 value = quoteattr(value)

680 parts.append('%s=%s' % (name, value))

681 return '<%s>' % ' '.join(parts)

682

683 def endtag(self) -> str:

684 return '</%s>' % self.tagname

685

686 def emptytag(self) -> str:

687 attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())

688 return '<%s/>' % ' '.join((self.tagname, *attributes))

689

690 def __len__(self) -> int:

691 return len(self.children)

692

693 def __contains__(self, key: str | Node) -> bool:

694 # Test for both, children and attributes with operator ``in``.

695 if isinstance(key, str):

696 return key in self.attributes

697 return key in self.children

698

699 @overload

700 def __getitem__(self, key: str) -> Any:

701 ...

702

703 @overload

704 def __getitem__(self, key: int) -> Node:

705 ...

706

707 @overload

708 def __getitem__(self, key: slice) -> list[Node]:

709 ...

710

711 def __getitem__(self,

712 key: str | int | slice,

713 ) -> Node | list[Node] | Any:

714 if isinstance(key, str):

715 return self.attributes[key]

716 elif isinstance(key, int):

717 return self.children[key]

718 elif isinstance(key, slice):

719 assert key.step in (None, 1), 'cannot handle slice with stride'

720 return self.children[key.start:key.stop]

721 else:

722 raise TypeError('element index must be an integer, a slice, or '

723 'an attribute name string')

724

725 @overload

726 def __setitem__(self, key: str, item: Any) -> None:

727 ...

728

729 @overload

730 def __setitem__(self, key: int, item: Node) -> None:

731 ...

732

733 @overload

734 def __setitem__(self, key: slice, item: Iterable[Node]) -> None:

735 ...

736

737 def __setitem__(self, key, item) -> None:

738 if isinstance(key, str):

739 self.attributes[str(key)] = item

740 elif isinstance(key, int):

741 self.setup_child(item)

742 self.children[key] = item

743 elif isinstance(key, slice):

744 assert key.step in (None, 1), 'cannot handle slice with stride'

745 for node in item:

746 self.setup_child(node)

747 self.children[key.start:key.stop] = item

748 else:

749 raise TypeError('element index must be an integer, a slice, or '

750 'an attribute name string')

751

752 def __delitem__(self, key: str | int | slice) -> None:

753 if isinstance(key, str):

754 del self.attributes[key]

755 elif isinstance(key, int):

756 del self.children[key]

757 elif isinstance(key, slice):

758 assert key.step in (None, 1), 'cannot handle slice with stride'

759 del self.children[key.start:key.stop]

760 else:

761 raise TypeError('element index must be an integer, a simple '

762 'slice, or an attribute name string')

763

764 def __add__(self, other: list[Node]) -> list[Node]:

765 return self.children + other

766

767 def __radd__(self, other: list[Node]) -> list[Node]:

768 return other + self.children

769

770 def __iadd__(self, other: Node | Iterable[Node]) -> Self:

771 """Append a node or a list of nodes to `self.children`."""

772 if isinstance(other, Node):

773 self.append(other)

774 elif other is not None:

775 self.extend(other)

776 return self

777

778 def astext(self) -> str:

779 return self.child_text_separator.join(

780 [child.astext() for child in self.children])

781

782 def non_default_attributes(self) -> dict[str, Any]:

783 atts = {key: value for key, value in self.attributes.items()

784 if self.is_not_default(key)}

785 return atts

786

787 def attlist(self) -> list[tuple[str, Any]]:

788 return sorted(self.non_default_attributes().items())

789

790 def get(self, key: str, failobj: Any | None = None) -> Any:

791 return self.attributes.get(key, failobj)

792

793 def hasattr(self, attr: str) -> bool:

794 return attr in self.attributes

795

796 def delattr(self, attr: str) -> None:

797 if attr in self.attributes:

798 del self.attributes[attr]

799

800 def setdefault(self, key: str, failobj: Any | None = None) -> Any:

801 return self.attributes.setdefault(key, failobj)

802

803 has_key = hasattr

804

805 def get_language_code(self, fallback: str = '') -> str:

806 """Return node's language tag.

807

808 Look iteratively in self and parents for a class argument

809 starting with ``language-`` and return the remainder of it

810 (which should be a `BCP49` language tag) or the `fallback`.

811 """

812 for cls in self.get('classes', []):

813 if cls.startswith('language-'):

814 return cls.removeprefix('language-')

815 try:

816 return self.parent.get_language_code(fallback)

817 except AttributeError:

818 return fallback

819

820 def append(self, item: Node) -> None:

821 self.setup_child(item)

822 self.children.append(item)

823

824 def extend(self, item: Iterable[Node]) -> None:

825 for node in item:

826 self.append(node)

827

828 def insert(self,

829 index: SupportsIndex,

830 item: Node | Iterable[Node],

831 ) -> None:

832 if isinstance(item, Node):

833 self.setup_child(item)

834 self.children.insert(index, item)

835 elif item is not None:

836 self[index:index] = item

837

838 def pop(self, i: int = -1) -> Node:

839 return self.children.pop(i)

840

841 def remove(self, item: Node) -> None:

842 self.children.remove(item)

843

844 def index(self,

845 item: Node,

846 start: int = 0,

847 stop: int = sys.maxsize,

848 ) -> int:

849 return self.children.index(item, start, stop)

850

851 def previous_sibling(self) -> Node | None:

852 """Return preceding sibling node or ``None``."""

853 try:

854 i = self.parent.index(self)

855 except (AttributeError):

856 return None

857 return self.parent[i-1] if i > 0 else None

858

859 def is_not_default(self, key: str) -> bool:

860 if self[key] == [] and key in self.list_attributes:

861 return False

862 else:

863 return True

864

865 def update_basic_atts(self, dict_: Mapping[str, Any] | Element) -> None:

866 """

867 Update basic attributes ('ids', 'names', 'classes',

868 'dupnames', but not 'source') from node or dictionary `dict_`.

869

870 Provisional.

871 """

872 if isinstance(dict_, Node):

873 dict_ = dict_.attributes

874 for att in self.basic_attributes:

875 self.append_attr_list(att, dict_.get(att, []))

876

877 def append_attr_list(self, attr: str, values: Iterable[Any]) -> None:

878 """

879 For each element in values, if it does not exist in self[attr], append

880 it.

881

882 NOTE: Requires self[attr] and values to be sequence type and the

883 former should specifically be a list.

884 """

885 # List Concatenation

886 for value in values:

887 if value not in self[attr]:

888 self[attr].append(value)

889

890 def coerce_append_attr_list(

891 self, attr: str, value: list[Any] | Any) -> None:

892 """

893 First, convert both self[attr] and value to a non-string sequence

894 type; if either is not already a sequence, convert it to a list of one

895 element. Then call append_attr_list.

896

897 NOTE: self[attr] and value both must not be None.

898 """

899 # List Concatenation

900 if not isinstance(self.get(attr), list):

901 self[attr] = [self[attr]]

902 if not isinstance(value, list):

903 value = [value]

904 self.append_attr_list(attr, value)

905

906 def replace_attr(self, attr: str, value: Any, force: bool = True) -> None:

907 """

908 If self[attr] does not exist or force is True or omitted, set

909 self[attr] to value, otherwise do nothing.

910 """

911 # One or the other

912 if force or self.get(attr) is None:

913 self[attr] = value

914

915 def copy_attr_convert(

916 self, attr: str, value: Any, replace: bool = True) -> None:

917 """

918 If attr is an attribute of self, set self[attr] to

919 [self[attr], value], otherwise set self[attr] to value.

920

921 NOTE: replace is not used by this function and is kept only for

922 compatibility with the other copy functions.

923 """

924 if self.get(attr) is not value:

925 self.coerce_append_attr_list(attr, value)

926

927 def copy_attr_coerce(self, attr: str, value: Any, replace: bool) -> None:

928 """

929 If attr is an attribute of self and either self[attr] or value is a

930 list, convert all non-sequence values to a sequence of 1 element and

931 then concatenate the two sequence, setting the result to self[attr].

932 If both self[attr] and value are non-sequences and replace is True or

933 self[attr] is None, replace self[attr] with value. Otherwise, do

934 nothing.

935 """

936 if self.get(attr) is not value:

937 if isinstance(self.get(attr), list) or \

938 isinstance(value, list):

939 self.coerce_append_attr_list(attr, value)

940 else:

941 self.replace_attr(attr, value, replace)

942

943 def copy_attr_concatenate(

944 self, attr: str, value: Any, replace: bool) -> None:

945 """

946 If attr is an attribute of self and both self[attr] and value are

947 lists, concatenate the two sequences, setting the result to

948 self[attr]. If either self[attr] or value are non-sequences and

949 replace is True or self[attr] is None, replace self[attr] with value.

950 Otherwise, do nothing.

951 """

952 if self.get(attr) is not value:

953 if isinstance(self.get(attr), list) and \

954 isinstance(value, list):

955 self.append_attr_list(attr, value)

956 else:

957 self.replace_attr(attr, value, replace)

958

959 def copy_attr_consistent(

960 self, attr: str, value: Any, replace: bool) -> None:

961 """

962 If replace is True or self[attr] is None, replace self[attr] with

963 value. Otherwise, do nothing.

964 """

965 if self.get(attr) is not value:

966 self.replace_attr(attr, value, replace)

967

968 def update_all_atts(self,

969 dict_: Mapping[str, Any] | Element,

970 update_fun: _UpdateFun = copy_attr_consistent,

971 replace: bool = True,

972 and_source: bool = False,

973 ) -> None:

974 """

975 Updates all attributes from node or dictionary `dict_`.

976

977 Appends the basic attributes ('ids', 'names', 'classes',

978 'dupnames', but not 'source') and then, for all other attributes in

979 dict_, updates the same attribute in self. When attributes with the

980 same identifier appear in both self and dict_, the two values are

981 merged based on the value of update_fun. Generally, when replace is

982 True, the values in self are replaced or merged with the values in

983 dict_; otherwise, the values in self may be preserved or merged. When

984 and_source is True, the 'source' attribute is included in the copy.

985

986 NOTE: When replace is False, and self contains a 'source' attribute,

987 'source' is not replaced even when dict_ has a 'source'

988 attribute, though it may still be merged into a list depending

989 on the value of update_fun.

990 NOTE: It is easier to call the update-specific methods then to pass

991 the update_fun method to this function.

992 """

993 if isinstance(dict_, Node):

994 dict_ = dict_.attributes

995

996 # Include the source attribute when copying?

997 if and_source:

998 filter_fun = self.is_not_list_attribute

999 else:

1000 filter_fun = self.is_not_known_attribute

1001

1002 # Copy the basic attributes

1003 self.update_basic_atts(dict_)

1004

1005 # Grab other attributes in dict_ not in self except the

1006 # (All basic attributes should be copied already)

1007 for att in filter(filter_fun, dict_):

1008 update_fun(self, att, dict_[att], replace)

1009

1010 def update_all_atts_consistantly(self,

1011 dict_: Mapping[str, Any] | Element,

1012 replace: bool = True,

1013 and_source: bool = False,

1014 ) -> None:

1015 """

1016 Updates all attributes from node or dictionary `dict_`.

1017

1018 Appends the basic attributes ('ids', 'names', 'classes',

1019 'dupnames', but not 'source') and then, for all other attributes in

1020 dict_, updates the same attribute in self. When attributes with the

1021 same identifier appear in both self and dict_ and replace is True, the

1022 values in self are replaced with the values in dict_; otherwise, the

1023 values in self are preserved. When and_source is True, the 'source'

1024 attribute is included in the copy.

1025

1026 NOTE: When replace is False, and self contains a 'source' attribute,

1027 'source' is not replaced even when dict_ has a 'source'

1028 attribute, though it may still be merged into a list depending

1029 on the value of update_fun.

1030 """

1031 self.update_all_atts(dict_, Element.copy_attr_consistent, replace,

1032 and_source)

1033

1034 def update_all_atts_concatenating(self,

1035 dict_: Mapping[str, Any] | Element,

1036 replace: bool = True,

1037 and_source: bool = False,

1038 ) -> None:

1039 """

1040 Updates all attributes from node or dictionary `dict_`.

1041

1042 Appends the basic attributes ('ids', 'names', 'classes',

1043 'dupnames', but not 'source') and then, for all other attributes in

1044 dict_, updates the same attribute in self. When attributes with the

1045 same identifier appear in both self and dict_ whose values aren't each

1046 lists and replace is True, the values in self are replaced with the

1047 values in dict_; if the values from self and dict_ for the given

1048 identifier are both of list type, then the two lists are concatenated

1049 and the result stored in self; otherwise, the values in self are

1050 preserved. When and_source is True, the 'source' attribute is

1051 included in the copy.

1052

1053 NOTE: When replace is False, and self contains a 'source' attribute,

1054 'source' is not replaced even when dict_ has a 'source'

1055 attribute, though it may still be merged into a list depending

1056 on the value of update_fun.

1057 """

1058 self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,

1059 and_source)

1060

1061 def update_all_atts_coercion(self,

1062 dict_: Mapping[str, Any] | Element,

1063 replace: bool = True,

1064 and_source: bool = False,

1065 ) -> None:

1066 """

1067 Updates all attributes from node or dictionary `dict_`.

1068

1069 Appends the basic attributes ('ids', 'names', 'classes',

1070 'dupnames', but not 'source') and then, for all other attributes in

1071 dict_, updates the same attribute in self. When attributes with the

1072 same identifier appear in both self and dict_ whose values are both

1073 not lists and replace is True, the values in self are replaced with

1074 the values in dict_; if either of the values from self and dict_ for

1075 the given identifier are of list type, then first any non-lists are

1076 converted to 1-element lists and then the two lists are concatenated

1077 and the result stored in self; otherwise, the values in self are

1078 preserved. When and_source is True, the 'source' attribute is

1079 included in the copy.

1080

1081 NOTE: When replace is False, and self contains a 'source' attribute,

1082 'source' is not replaced even when dict_ has a 'source'

1083 attribute, though it may still be merged into a list depending

1084 on the value of update_fun.

1085 """

1086 self.update_all_atts(dict_, Element.copy_attr_coerce, replace,

1087 and_source)

1088

1089 def update_all_atts_convert(self,

1090 dict_: Mapping[str, Any] | Element,

1091 and_source: bool = False,

1092 ) -> None:

1093 """

1094 Updates all attributes from node or dictionary `dict_`.

1095

1096 Appends the basic attributes ('ids', 'names', 'classes',

1097 'dupnames', but not 'source') and then, for all other attributes in

1098 dict_, updates the same attribute in self. When attributes with the

1099 same identifier appear in both self and dict_ then first any non-lists

1100 are converted to 1-element lists and then the two lists are

1101 concatenated and the result stored in self; otherwise, the values in

1102 self are preserved. When and_source is True, the 'source' attribute

1103 is included in the copy.

1104

1105 NOTE: When replace is False, and self contains a 'source' attribute,

1106 'source' is not replaced even when dict_ has a 'source'

1107 attribute, though it may still be merged into a list depending

1108 on the value of update_fun.

1109 """

1110 self.update_all_atts(dict_, Element.copy_attr_convert,

1111 and_source=and_source)

1112

1113 def clear(self) -> None:

1114 self.children = []

1115

1116 def replace(self, old: Node, new: Node | Iterable[Node]) -> None:

1117 """Replace one child `Node` with another child or children."""

1118 index = self.index(old)

1119 if isinstance(new, Node):

1120 self.setup_child(new)

1121 self[index] = new

1122 elif new is not None:

1123 self[index:index+1] = new

1124

1125 def replace_self(self, new: Node | Sequence[Node]) -> None:

1126 """

1127 Replace `self` node with `new`, where `new` is a node or a

1128 list of nodes.

1129

1130 Provisional: the handling of node attributes will be revised.

1131 """

1132 update = new

1133 if not isinstance(new, Node):

1134 # `new` is a list; update first child.

1135 try:

1136 update = new[0]

1137 except IndexError:

1138 update = None

1139 if isinstance(update, Element):

1140 update.update_basic_atts(self)

1141 else:

1142 # `update` is a Text node or `new` is an empty list.

1143 # Assert that we aren't losing any attributes.

1144 for att in self.basic_attributes:

1145 assert not self[att], \

1146 'Losing "%s" attribute: %s' % (att, self[att])

1147 self.parent.replace(self, new)

1148

1149 def first_child_matching_class(self,

1150 childclass: type[Element] | type[Text]

1151 | tuple[type[Element] | type[Text], ...],

1152 start: int = 0,

1153 end: int = sys.maxsize,

1154 ) -> int | None:

1155 """

1156 Return the index of the first child whose class exactly matches.

1157

1158 Parameters:

1159

1160 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`

1161 classes. If a tuple, any of the classes may match.

1162 - `start`: Initial index to check.

1163 - `end`: Initial index to *not* check.

1164 """

1165 if not isinstance(childclass, tuple):

1166 childclass = (childclass,)

1167 for index in range(start, min(len(self), end)):

1168 for c in childclass:

1169 if isinstance(self[index], c):

1170 return index

1171 return None

1172

1173 def first_child_not_matching_class(

1174 self,

1175 childclass: type[Element] | type[Text]

1176 | tuple[type[Element] | type[Text], ...],

1177 start: int = 0,

1178 end: int = sys.maxsize,

1179 ) -> int | None:

1180 """

1181 Return the index of the first child whose class does *not* match.

1182

1183 Parameters:

1184

1185 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`

1186 classes. If a tuple, none of the classes may match.

1187 - `start`: Initial index to check.

1188 - `end`: Initial index to *not* check.

1189 """

1190 if not isinstance(childclass, tuple):

1191 childclass = (childclass,)

1192 for index in range(start, min(len(self), end)):

1193 for c in childclass:

1194 if isinstance(self.children[index], c):

1195 break

1196 else:

1197 return index

1198 return None

1199

1200 def pformat(self, indent: str = ' ', level: int = 0) -> str:

1201 tagline = '%s%s\n' % (indent*level, self.starttag())

1202 childreps = (c.pformat(indent, level+1) for c in self.children)

1203 return ''.join((tagline, *childreps))

1204

1205 def copy(self) -> Self:

1206 obj = self.__class__(rawsource=self.rawsource, **self.attributes)

1207 obj._document = self._document

1208 obj.source = self.source

1209 obj.line = self.line

1210 return obj

1211

1212 def deepcopy(self) -> Self:

1213 copy = self.copy()

1214 copy.extend([child.deepcopy() for child in self.children])

1215 return copy

1216

1217 def note_referenced_by(self,

1218 name: str | None = None,

1219 id: str | None = None,

1220 ) -> None:

1221 """Note that this Element has been referenced by its name

1222 `name` or id `id`."""

1223 self.referenced = True

1224 # Element.expect_referenced_by_* dictionaries map names or ids

1225 # to nodes whose ``referenced`` attribute is set to true as

1226 # soon as this node is referenced by the given name or id.

1227 # Needed for target propagation.

1228 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)

1229 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)

1230 if by_name:

1231 assert name is not None

1232 by_name.referenced = True

1233 if by_id:

1234 assert id is not None

1235 by_id.referenced = True

1236

1237 @classmethod

1238 def is_not_list_attribute(cls, attr: str) -> bool:

1239 """

1240 Returns True if and only if the given attribute is NOT one of the

1241 basic list attributes defined for all Elements.

1242 """

1243 return attr not in cls.list_attributes

1244

1245 @classmethod

1246 def is_not_known_attribute(cls, attr: str) -> bool:

1247 """

1248 Return True if `attr` is NOT defined for all Element instances.

1249

1250 Provisional. May be removed in Docutils 2.0.

1251 """

1252 return attr not in cls.common_attributes

1253

1254 def validate_attributes(self) -> None:

1255 """Normalize and validate element attributes.

1256

1257 Convert string values to expected datatype.

1258 Normalize values.

1259

1260 Raise `ValidationError` for invalid attributes or attribute values.

1261

1262 Provisional.

1263 """

1264 messages = []

1265 for key, value in self.attributes.items():

1266 if key.startswith('internal:'):

1267 continue # see docs/user/config.html#expose-internals

1268 if key not in self.valid_attributes:

1269 va = '", "'.join(self.valid_attributes)

1270 messages.append(f'Attribute "{key}" not one of "{va}".')

1271 continue

1272 try:

1273 self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)

1274 except (ValueError, TypeError, KeyError) as e:

1275 messages.append(

1276 f'Attribute "{key}" has invalid value "{value}".\n {e}')

1277 if messages:

1278 raise ValidationError(f'Element {self.starttag()} invalid:\n '

1279 + '\n '.join(messages),

1280 problematic_element=self)

1281

1282 def validate_content(self,

1283 model: _ContentModelTuple | None = None,

1284 elements: Sequence[Node] | None = None,

1285 ) -> list[Node]:

1286 """Test compliance of `elements` with `model`.

1287

1288 :model: content model description, default `self.content_model`,

1289 :elements: list of doctree elements, default `self.children`.

1290

1291 Return list of children that do not fit in the model or raise

1292 `ValidationError` if the content does not comply with the `model`.

1293

1294 Provisional.

1295 """

1296 if model is None:

1297 model = self.content_model

1298 if elements is None:

1299 elements = self.children

1300 ichildren = iter(elements)

1301 child = next(ichildren, None)

1302 for category, quantifier in model:

1303 if not isinstance(child, category):

1304 if quantifier in ('.', '+'):

1305 raise ValidationError(self._report_child(child, category),

1306 problematic_element=child)

1307 else: # quantifier in ('?', '*') -> optional child

1308 continue # try same child with next part of content model

1309 else:

1310 # Check additional placement constraints (if applicable):

1311 child.validate_position()

1312 # advance:

1313 if quantifier in ('.', '?'): # go to next element

1314 child = next(ichildren, None)

1315 else: # if quantifier in ('*', '+'): # pass all matching elements

1316 for child in ichildren:

1317 if not isinstance(child, category):

1318 break

1319 try:

1320 child.validate_position()

1321 except AttributeError:

1322 pass

1323 else:

1324 child = None

1325 return [] if child is None else [child, *ichildren]

1326

1327 def _report_child(self,

1328 child: Node | None,

1329 category: Element | Iterable[Element],

1330 ) -> str:

1331 # Return a str reporting a missing child or child of wrong category.

1332 try:

1333 _type = category.__name__

1334 except AttributeError:

1335 _type = '> or <'.join(c.__name__ for c in category)

1336 msg = f'Element {self.starttag()} invalid:\n'

1337 if child is None:

1338 return f'{msg} Missing child of type <{_type}>.'

1339 if isinstance(child, Text):

1340 return (f'{msg} Expecting child of type <{_type}>, '

1341 f'not text data "{child.astext()}".')

1342 return (f'{msg} Expecting child of type <{_type}>, '

1343 f'not {child.starttag()}.')

1344

1345 def validate(self, recursive: bool = True) -> None:

1346 """Validate Docutils Document Tree element ("doctree").

1347

1348 Raise ValidationError if there are violations.

1349 If `recursive` is True, validate also the element's descendants.

1350

1351 See `The Docutils Document Tree`__ for details of the

1352 Docutils Document Model.

1353

1354 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1355

1356 Provisional (work in progress).

1357 """

1358 self.validate_attributes()

1359

1360 leftover_childs = self.validate_content()

1361 for child in leftover_childs:

1362 if isinstance(child, Text):

1363 raise ValidationError(f'Element {self.starttag()} invalid:\n'

1364 f' Spurious text: "{child.astext()}".',

1365 problematic_element=self)

1366 else:

1367 raise ValidationError(f'Element {self.starttag()} invalid:\n'

1368 f' Child element {child.starttag()} '

1369 'not allowed at this position.',

1370 problematic_element=child)

1371

1372 if recursive:

1373 for child in self:

1374 child.validate(recursive=recursive)

1375

1376

1377# ====================

1378# Element Categories

1379# ====================

1380#

1381# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-hierarchy.

1382

1383class Root:

1384 """Element at the root of a document tree."""

1385

1386

1387class Structural:

1388 """`Structural elements`__.

1389

1390 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1391 #structural-elements

1392 """

1393

1394

1395class SubStructural:

1396 """`Structural subelements`__ are children of `Structural` elements.

1397

1398 Most Structural elements accept only specific `SubStructural` elements.

1399

1400 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1401 #structural-subelements

1402 """

1403

1404

1405class Bibliographic:

1406 """`Bibliographic Elements`__ (displayed document meta-data).

1407

1408 __ https://docutils.sourceforge.io/docs/ref/doctree.html

1409 #bibliographic-elements

1410 """

1411

1412

1413class Body:

1414 """`Body elements`__.

1415

1416 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-elements

1417 """

1418

1419

1420class Admonition(Body):

1421 """Admonitions (distinctive and self-contained notices)."""

1422 content_model: Final = ((Body, '+'),) # (%body.elements;)+

1423

1424

1425class Sequential(Body):

1426 """List-like body elements."""

1427

1428

1429class General(Body):

1430 """Miscellaneous body elements."""

1431

1432

1433class Special(Body):

1434 """Special internal body elements."""

1435

1436

1437class Part:

1438 """`Body Subelements`__ always occur within specific parent elements.

1439

1440 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-subelements

1441 """

1442

1443

1444class Decorative:

1445 """Decorative elements (`header` and `footer`).

1446

1447 Children of `decoration`.

1448 """

1449 content_model: Final = ((Body, '+'),) # (%body.elements;)+

1450

1451

1452class Inline:

1453 """Inline elements contain text data and possibly other inline elements.

1454 """

1455

1456

1457# Orthogonal categories and Mixins

1458# ================================

1459

1460class PreBibliographic:

1461 """Elements which may occur before Bibliographic Elements."""

1462

1463

1464class Invisible(Special, PreBibliographic):

1465 """Internal elements that don't appear in output."""

1466

1467

1468class Labeled:

1469 """Contains a `label` as its first element."""

1470

1471

1472class Resolvable:

1473 resolved: bool = False

1474

1475

1476class BackLinkable:

1477 """Mixin for Elements that accept a "backrefs" attribute."""

1478

1479 list_attributes: Final = Element.list_attributes + ('backrefs',)

1480 valid_attributes: Final = Element.valid_attributes + ('backrefs',)

1481

1482 def add_backref(self: Element, refid: str) -> None:

1483 self['backrefs'].append(refid)

1484

1485

1486class Referential(Resolvable):

1487 """Elements holding a cross-reference (outgoing hyperlink)."""

1488

1489

1490class Targetable(Resolvable):

1491 """Cross-reference targets (incoming hyperlink)."""

1492 referenced: int = 0

1493

1494 indirect_reference_name: str | None = None

1495 """Holds the whitespace_normalized_name (contains mixed case) of a target.

1496 Required for MoinMoin/reST compatibility.

1497

1498 Provisional.

1499 """

1500

1501

1502class Titular:

1503 """Title, sub-title, or informal heading (rubric)."""

1504

1505

1506class TextElement(Element):

1507 """

1508 An element which directly contains text.

1509

1510 Its children are all `Text` or `Inline` subclass nodes. You can

1511 check whether an element's context is inline simply by checking whether

1512 its immediate parent is a `TextElement` instance (including subclasses).

1513 This is handy for nodes like `image` that can appear both inline and as

1514 standalone body elements.

1515

1516 If passing children to `__init__()`, make sure to set `text` to

1517 ``''`` or some other suitable value.

1518 """

1519 content_model: Final = (((Text, Inline), '*'),)

1520 # (#PCDATA | %inline.elements;)*

1521

1522 child_text_separator: Final = ''

1523 """Separator for child nodes, used by `astext()` method."""

1524

1525 def __init__(self,

1526 rawsource: str = '',

1527 text: str = '',

1528 *children: Node,

1529 **attributes: Any,

1530 ) -> None:

1531 if text:

1532 textnode = Text(text)

1533 Element.__init__(self, rawsource, textnode, *children,

1534 **attributes)

1535 else:

1536 Element.__init__(self, rawsource, *children, **attributes)

1537

1538

1539class FixedTextElement(TextElement):

1540 """An element which directly contains preformatted text."""

1541

1542 valid_attributes: Final = Element.valid_attributes + ('xml:space',)

1543

1544 def __init__(self,

1545 rawsource: str = '',

1546 text: str = '',

1547 *children: Node,

1548 **attributes: Any,

1549 ) -> None:

1550 super().__init__(rawsource, text, *children, **attributes)

1551 self.attributes['xml:space'] = 'preserve'

1552

1553

1554class PureTextElement(TextElement):

1555 """An element which only contains text, no children."""

1556 content_model: Final = ((Text, '?'),) # (#PCDATA)

1557

1558

1559# =================================

1560# Concrete Document Tree Elements

1561# =================================

1562#

1563# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-reference

1564

1565# Decorative Elements

1566# ===================

1567

1568class header(Decorative, Element): pass

1569class footer(Decorative, Element): pass

1570

1571

1572# Structural Subelements

1573# ======================

1574

1575class title(Titular, PreBibliographic, SubStructural, TextElement):

1576 """Title of `document`, `section`, `topic` and generic `admonition`.

1577 """

1578 valid_attributes: Final = Element.valid_attributes + ('auto', 'refid')

1579

1580

1581class subtitle(Titular, PreBibliographic, SubStructural, TextElement):

1582 """Sub-title of `document`, `section` and `sidebar`."""

1583

1584 def validate_position(self) -> None:

1585 """Check position of subtitle: must follow a title."""

1586 if self.parent and self.parent.index(self) == 0:

1587 raise ValidationError(f'Element {self.parent.starttag()} invalid:'

1588 '\n <subtitle> only allowed after <title>.',

1589 problematic_element=self)

1590

1591

1592class meta(PreBibliographic, SubStructural, Element):

1593 """Container for "invisible" bibliographic data, or meta-data."""

1594 valid_attributes: Final = Element.valid_attributes + (

1595 'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')

1596

1597

1598class docinfo(SubStructural, Element):

1599 """Container for displayed document meta-data."""

1600 content_model: Final = ((Bibliographic, '+'),)

1601 # (%bibliographic.elements;)+

1602

1603

1604class decoration(PreBibliographic, SubStructural, Element):

1605 """Container for `header` and `footer`."""

1606 content_model: Final = ((header, '?'), # Empty element doesn't make sense,

1607 (footer, '?'), # but is simpler to define.

1608 )

1609 # (header?, footer?)

1610

1611 def get_header(self) -> header:

1612 if not len(self.children) or not isinstance(self.children[0], header):

1613 self.insert(0, header())

1614 return self.children[0]

1615

1616 def get_footer(self) -> footer:

1617 if not len(self.children) or not isinstance(self.children[-1], footer):

1618 self.append(footer())

1619 return self.children[-1]

1620

1621

1622class transition(SubStructural, Element):

1623 """Transitions__ are breaks between untitled text parts.

1624

1625 __ https://docutils.sourceforge.io/docs/ref/doctree.html#transition

1626 """

1627

1628 def validate_position(self) -> None:

1629 """Check additional constraints on `transition` placement.

1630

1631 A transition may not begin or end a section or document,

1632 nor may two transitions be immediately adjacent.

1633 """

1634 messages = [f'Element {self.parent.starttag()} invalid:']

1635 predecessor = self.previous_sibling()

1636 if (predecessor is None # index == 0

1637 or isinstance(predecessor, (title, subtitle, meta, decoration))

1638 # A transition following these elements still counts as

1639 # "at the beginning of a document or section".

1640 ):

1641 messages.append(

1642 '<transition> may not begin a section or document.')

1643 if self.parent.index(self) == len(self.parent) - 1:

1644 messages.append('<transition> may not end a section or document.')

1645 if isinstance(predecessor, transition):

1646 messages.append(

1647 '<transition> may not directly follow another transition.')

1648 if len(messages) > 1:

1649 raise ValidationError('\n '.join(messages),

1650 problematic_element=self)

1651

1652

1653# Structural Elements

1654# ===================

1655

1656class topic(Structural, Element):

1657 """

1658 Topics__ are non-recursive, mini-sections.

1659

1660 __ https://docutils.sourceforge.io/docs/ref/doctree.html#topic

1661 """

1662 content_model: Final = ((title, '?'), (Body, '+'))

1663 # (title?, (%body.elements;)+)

1664

1665

1666class sidebar(Structural, Element):

1667 """

1668 Sidebars__ are like parallel documents providing related material.

1669

1670 A sidebar is typically offset by a border and "floats" to the side

1671 of the page

1672

1673 __ https://docutils.sourceforge.io/docs/ref/doctree.html#sidebar

1674 """

1675 content_model: Final = ((title, '?'),

1676 (subtitle, '?'),

1677 ((topic, Body), '+'),

1678 )

1679 # ((title, subtitle?)?, (%body.elements; | topic)+)

1680 # "subtitle only after title" is ensured in `subtitle.validate_position()`.

1681

1682

1683class section(Structural, Element):

1684 """Document section__. The main unit of hierarchy.

1685

1686 __ https://docutils.sourceforge.io/docs/ref/doctree.html#section

1687 """

1688 # recursive content model, see below

1689

1690

1691section.content_model = ((title, '.'),

1692 (subtitle, '?'),

1693 ((Body, topic, sidebar, transition), '*'),

1694 ((section, transition), '*'),

1695 )

1696# (title, subtitle?, %structure.model;)

1697# Correct transition placement is ensured in `transition.validate_position()`.

1698

1699

1700# Root Element

1701# ============

1702

1703class document(Root, Element):

1704 """

1705 The document root element.

1706

1707 Do not instantiate this class directly; use

1708 `docutils.utils.new_document()` instead.

1709 """

1710 valid_attributes: Final = Element.valid_attributes + ('title',)

1711 content_model: Final = ((title, '?'),

1712 (subtitle, '?'),

1713 (meta, '*'),

1714 (decoration, '?'),

1715 (docinfo, '?'),

1716 (transition, '?'),

1717 ((Body, topic, sidebar, transition), '*'),

1718 ((section, transition), '*'),

1719 )

1720 # ( (title, subtitle?)?,

1721 # meta*,

1722 # decoration?,

1723 # (docinfo, transition?)?,

1724 # %structure.model; )

1725 # Additional restrictions for `subtitle` and `transition` are tested

1726 # with the respective `validate_position()` methods.

1727

1728 def __init__(self,

1729 settings: Values,

1730 reporter: Reporter,

1731 *args: Node,

1732 **kwargs: Any,

1733 ) -> None:

1734 Element.__init__(self, *args, **kwargs)

1735

1736 self.current_source: StrPath | None = None

1737 """Path to or description of the input source being processed."""

1738

1739 self.current_line: int | None = None

1740 """Line number (1-based) of `current_source`."""

1741

1742 self.settings: Values = settings

1743 """Runtime settings data record."""

1744

1745 self.reporter: Reporter = reporter

1746 """System message generator."""

1747

1748 self.indirect_targets: list[target] = []

1749 """List of indirect target nodes."""

1750

1751 self.substitution_defs: dict[str, substitution_definition] = {}

1752 """Mapping of substitution names to substitution_definition nodes."""

1753

1754 self.substitution_names: dict[str, str] = {}

1755 """Mapping of case-normalized to case-sensitive substitution names."""

1756

1757 self.refnames: dict[str, list[Element]] = {}

1758 """Mapping of names to lists of referencing nodes."""

1759

1760 self.refids: dict[str, list[Element]] = {}

1761 """Mapping of ids to lists of referencing nodes."""

1762

1763 self.nameids: dict[str, str] = {}

1764 """Mapping of names to unique id's."""

1765

1766 self.nametypes: dict[str, bool] = {}

1767 """Mapping of names to hyperlink type. True: explicit, False: implicit.

1768 """

1769

1770 self.ids: dict[str, Element] = {}

1771 """Mapping of ids to nodes."""

1772

1773 self.footnote_refs: dict[str, list[footnote_reference]] = {}

1774 """Mapping of footnote labels to lists of footnote_reference nodes."""

1775

1776 self.citation_refs: dict[str, list[citation_reference]] = {}

1777 """Mapping of citation labels to lists of citation_reference nodes."""

1778

1779 self.autofootnotes: list[footnote] = []

1780 """List of auto-numbered footnote nodes."""

1781

1782 self.autofootnote_refs: list[footnote_reference] = []

1783 """List of auto-numbered footnote_reference nodes."""

1784

1785 self.symbol_footnotes: list[footnote] = []

1786 """List of symbol footnote nodes."""

1787

1788 self.symbol_footnote_refs: list[footnote_reference] = []

1789 """List of symbol footnote_reference nodes."""

1790

1791 self.footnotes: list[footnote] = []

1792 """List of manually-numbered footnote nodes."""

1793

1794 self.citations: list[citation] = []

1795 """List of citation nodes."""

1796

1797 self.autofootnote_start: int = 1

1798 """Initial auto-numbered footnote number."""

1799

1800 self.symbol_footnote_start: int = 0

1801 """Initial symbol footnote symbol index."""

1802

1803 self.id_counter: Counter[int] = Counter()

1804 """Numbers added to otherwise identical IDs."""

1805

1806 self.parse_messages: list[system_message] = []

1807 """System messages generated while parsing."""

1808

1809 self.transform_messages: list[system_message] = []

1810 """System messages generated while applying transforms."""

1811

1812 import docutils.transforms

1813 self.transformer: Transformer = docutils.transforms.Transformer(self)

1814 """Storage for transforms to be applied to this document."""

1815

1816 self.include_log: list[tuple[StrPath, tuple]] = []

1817 """The current source's parents (to detect inclusion loops)."""

1818

1819 self.decoration: decoration | None = None

1820 """Document's `decoration` node."""

1821

1822 self._document: document = self

1823

1824 def __getstate__(self) -> dict[str, Any]:

1825 """

1826 Return dict with unpicklable references removed.

1827 """

1828 state = self.__dict__.copy()

1829 state['reporter'] = None

1830 state['transformer'] = None

1831 return state

1832

1833 def asdom(self, dom: ModuleType | None = None) -> minidom.Document:

1834 """Return a DOM representation of this document."""

1835 if dom is None:

1836 import xml.dom.minidom as dom

1837 domroot = dom.Document()

1838 domroot.appendChild(self._dom_node(domroot))

1839 return domroot

1840

1841 def set_id(self,

1842 node: Element,

1843 msgnode: Element | None = None,

1844 suggested_prefix: str = '',

1845 ) -> str:

1846 if node['ids']:

1847 # register and check for duplicates

1848 for id in node['ids']:

1849 self.ids.setdefault(id, node)

1850 if self.ids[id] is not node:

1851 msg = self.reporter.error(f'Duplicate ID: "{id}" used by '

1852 f'{self.ids[id].starttag()} '

1853 f'and {node.starttag()}',

1854 base_node=node)

1855 if msgnode is not None:

1856 msgnode += msg

1857 return id

1858 # generate and set id

1859 id_prefix = self.settings.id_prefix

1860 auto_id_prefix = self.settings.auto_id_prefix

1861 base_id = ''

1862 id = ''

1863 for name in node['names']:

1864 if id_prefix: # allow names starting with numbers

1865 base_id = make_id('x'+name)[1:]

1866 else:

1867 base_id = make_id(name)

1868 # TODO: normalize id-prefix? (would make code simpler)

1869 id = id_prefix + base_id

1870 if base_id and id not in self.ids:

1871 break

1872 else:

1873 if base_id and auto_id_prefix.endswith('%'):

1874 # disambiguate name-derived ID

1875 # TODO: remove second condition after announcing change

1876 prefix = id + '-'

1877 else:

1878 prefix = id_prefix + auto_id_prefix

1879 if prefix.endswith('%'):

1880 prefix = f"""{prefix[:-1]}{suggested_prefix

1881 or make_id(node.tagname)}-"""

1882 while True:

1883 self.id_counter[prefix] += 1

1884 id = f'{prefix}{self.id_counter[prefix]}'

1885 if id not in self.ids:

1886 break

1887 node['ids'].append(id)

1888 self.ids[id] = node

1889 return id

1890

1891 def set_name_id_map(self,

1892 node: Element,

1893 id: str,

1894 msgnode: Element | None = None,

1895 explicit: bool = False,

1896 ) -> None:

1897 """

1898 `self.nameids` maps names to IDs, while `self.nametypes` maps names to

1899 booleans representing hyperlink type (True==explicit,

1900 False==implicit). This method updates the mappings.

1901

1902 The following state transition table shows how `self.nameids` items

1903 ("id") and `self.nametypes` items ("type") change with new input

1904 (a call to this method), and what actions are performed

1905 ("implicit"-type system messages are INFO/1, and

1906 "explicit"-type system messages are ERROR/3):

1907

1908 ==== ===== ======== ======== ======= ==== ===== =====

1909 Old State Input Action New State Notes

1910 ----------- -------- ----------------- ----------- -----

1911 id type new type sys.msg. dupname id type

1912 ==== ===== ======== ======== ======= ==== ===== =====

1913 - - explicit - - new True

1914 - - implicit - - new False

1915 - False explicit - - new True

1916 old False explicit implicit old new True

1917 - True explicit explicit new - True

1918 old True explicit explicit new,old - True [#]_

1919 - False implicit implicit new - False

1920 old False implicit implicit new,old - False

1921 - True implicit implicit new - True

1922 old True implicit implicit new old True

1923 ==== ===== ======== ======== ======= ==== ===== =====

1924

1925 .. [#] Do not clear the name-to-id map or invalidate the old target if

1926 both old and new targets are external and refer to identical URIs.

1927 The new target is invalidated regardless.

1928 """

1929 for name in tuple(node['names']):

1930 if name in self.nameids:

1931 self.set_duplicate_name_id(node, id, name, msgnode, explicit)

1932 # attention: modifies node['names']

1933 else:

1934 self.nameids[name] = id

1935 self.nametypes[name] = explicit

1936

1937 def set_duplicate_name_id(self,

1938 node: Element,

1939 id: str,

1940 name: str,

1941 msgnode: Element,

1942 explicit: bool,

1943 ) -> None:

1944 old_id = self.nameids[name]

1945 old_explicit = self.nametypes[name]

1946 self.nametypes[name] = old_explicit or explicit

1947 if explicit:

1948 if old_explicit:

1949 level = 2

1950 if old_id is not None:

1951 old_node = self.ids[old_id]

1952 if 'refuri' in node:

1953 refuri = node['refuri']

1954 if (old_node['names']

1955 and 'refuri' in old_node

1956 and old_node['refuri'] == refuri):

1957 level = 1 # just inform if refuri's identical

1958 if level > 1:

1959 dupname(old_node, name)

1960 self.nameids[name] = None

1961 msg = self.reporter.system_message(

1962 level, 'Duplicate explicit target name: "%s".' % name,

1963 backrefs=[id], base_node=node)

1964 if msgnode is not None:

1965 msgnode += msg

1966 dupname(node, name)

1967 else:

1968 self.nameids[name] = id

1969 if old_id is not None:

1970 old_node = self.ids[old_id]

1971 dupname(old_node, name)

1972 else:

1973 if old_id is not None and not old_explicit:

1974 self.nameids[name] = None

1975 old_node = self.ids[old_id]

1976 dupname(old_node, name)

1977 dupname(node, name)

1978 if not explicit or (not old_explicit and old_id is not None):

1979 msg = self.reporter.info(

1980 'Duplicate implicit target name: "%s".' % name,

1981 backrefs=[id], base_node=node)

1982 if msgnode is not None:

1983 msgnode += msg

1984

1985 def has_name(self, name: str) -> bool:

1986 return name in self.nameids

1987

1988 # "note" here is an imperative verb: "take note of".

1989 def note_implicit_target(

1990 self, target: Element, msgnode: Element | None = None) -> None:

1991 id = self.set_id(target, msgnode)

1992 self.set_name_id_map(target, id, msgnode, explicit=False)

1993

1994 def note_explicit_target(

1995 self, target: Element, msgnode: Element | None = None) -> None:

1996 id = self.set_id(target, msgnode)

1997 self.set_name_id_map(target, id, msgnode, explicit=True)

1998

1999 def note_refname(self, node: Element) -> None:

2000 self.refnames.setdefault(node['refname'], []).append(node)

2001

2002 def note_refid(self, node: Element) -> None:

2003 self.refids.setdefault(node['refid'], []).append(node)

2004

2005 def note_indirect_target(self, target: target) -> None:

2006 self.indirect_targets.append(target)

2007 if target['names']:

2008 self.note_refname(target)

2009

2010 def note_anonymous_target(self, target: target) -> None:

2011 self.set_id(target)

2012

2013 def note_autofootnote(self, footnote: footnote) -> None:

2014 self.set_id(footnote)

2015 self.autofootnotes.append(footnote)

2016

2017 def note_autofootnote_ref(self, ref: footnote_reference) -> None:

2018 self.set_id(ref)

2019 self.autofootnote_refs.append(ref)

2020

2021 def note_symbol_footnote(self, footnote: footnote) -> None:

2022 self.set_id(footnote)

2023 self.symbol_footnotes.append(footnote)

2024

2025 def note_symbol_footnote_ref(self, ref: footnote_reference) -> None:

2026 self.set_id(ref)

2027 self.symbol_footnote_refs.append(ref)

2028

2029 def note_footnote(self, footnote: footnote) -> None:

2030 self.set_id(footnote)

2031 self.footnotes.append(footnote)

2032

2033 def note_footnote_ref(self, ref: footnote_reference) -> None:

2034 self.set_id(ref)

2035 self.footnote_refs.setdefault(ref['refname'], []).append(ref)

2036 self.note_refname(ref)

2037

2038 def note_citation(self, citation: citation) -> None:

2039 self.citations.append(citation)

2040

2041 def note_citation_ref(self, ref: citation_reference) -> None:

2042 self.set_id(ref)

2043 self.citation_refs.setdefault(ref['refname'], []).append(ref)

2044 self.note_refname(ref)

2045

2046 def note_substitution_def(self,

2047 subdef: substitution_definition,

2048 def_name: str,

2049 msgnode: Element | None = None,

2050 ) -> None:

2051 name = whitespace_normalize_name(def_name)

2052 if name in self.substitution_defs:

2053 msg = self.reporter.error(

2054 'Duplicate substitution definition name: "%s".' % name,

2055 base_node=subdef)

2056 if msgnode is not None:

2057 msgnode += msg

2058 oldnode = self.substitution_defs[name]

2059 dupname(oldnode, name)

2060 # keep only the last definition:

2061 self.substitution_defs[name] = subdef

2062 # case-insensitive mapping:

2063 self.substitution_names[fully_normalize_name(name)] = name

2064

2065 def note_substitution_ref(self,

2066 subref: substitution_reference,

2067 refname: str,

2068 ) -> None:

2069 subref['refname'] = whitespace_normalize_name(refname)

2070

2071 def note_pending(

2072 self, pending: pending, priority: int | None = None) -> None:

2073 self.transformer.add_pending(pending, priority)

2074

2075 def note_parse_message(self, message: system_message) -> None:

2076 self.parse_messages.append(message)

2077

2078 def note_transform_message(self, message: system_message) -> None:

2079 self.transform_messages.append(message)

2080

2081 def note_source(self,

2082 source: StrPath | None,

2083 offset: int | None,

2084 ) -> None:

2085 self.current_source = source and os.fspath(source)

2086 if offset is None:

2087 self.current_line = offset

2088 else:

2089 self.current_line = offset + 1

2090

2091 def copy(self) -> Self:

2092 obj = self.__class__(self.settings, self.reporter,

2093 **self.attributes)

2094 obj.source = self.source

2095 obj.line = self.line

2096 return obj

2097

2098 def get_decoration(self) -> decoration:

2099 if not self.decoration:

2100 self.decoration: decoration = decoration()

2101 index = self.first_child_not_matching_class((Titular, meta))

2102 if index is None:

2103 self.append(self.decoration)

2104 else:

2105 self.insert(index, self.decoration)

2106 return self.decoration

2107

2108

2109# Bibliographic Elements

2110# ======================

2111

2112class author(Bibliographic, TextElement): pass

2113class organization(Bibliographic, TextElement): pass

2114class address(Bibliographic, FixedTextElement): pass

2115class contact(Bibliographic, TextElement): pass

2116class version(Bibliographic, TextElement): pass

2117class revision(Bibliographic, TextElement): pass

2118class status(Bibliographic, TextElement): pass

2119class date(Bibliographic, TextElement): pass

2120class copyright(Bibliographic, TextElement): pass # NoQA: A001 (builtin name)

2121

2122

2123class authors(Bibliographic, Element):

2124 """Container for author information for documents with multiple authors.

2125 """

2126 content_model: Final = ((author, '+'),

2127 (organization, '?'),

2128 (address, '?'),

2129 (contact, '?'),

2130 )

2131 # (author, organization?, address?, contact?)+

2132

2133 def validate_content(self,

2134 model: _ContentModelTuple | None = None,

2135 elements: Sequence[Node] | None = None,

2136 ) -> list[Node]:

2137 """Repeatedly test for children matching the content model.

2138

2139 Provisional.

2140 """

2141 relics = super().validate_content()

2142 while relics:

2143 relics = super().validate_content(elements=relics)

2144 return relics

2145

2146

2147# Body Elements

2148# =============

2149#

2150# General

2151# -------

2152#

2153# Miscellaneous Body Elements and related Body Subelements (Part)

2154

2155class paragraph(General, TextElement): pass

2156class rubric(Titular, General, TextElement): pass

2157

2158

2159class compound(General, Element):

2160 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2161

2162

2163class container(General, Element):

2164 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2165

2166

2167class attribution(Part, TextElement):

2168 """Visible reference to the source of a `block_quote`."""

2169

2170

2171class block_quote(General, Element):

2172 """An extended quotation, set off from the main text."""

2173 content_model: Final = ((Body, '+'), (attribution, '?'))

2174 # ((%body.elements;)+, attribution?)

2175

2176

2177# Lists

2178# -----

2179#

2180# Lists (Sequential) and related Body Subelements (Part)

2181

2182class list_item(Part, Element):

2183 content_model: Final = ((Body, '*'),) # (%body.elements;)*

2184

2185

2186class bullet_list(Sequential, Element):

2187 valid_attributes: Final = Element.valid_attributes + ('bullet',)

2188 content_model: Final = ((list_item, '+'),) # (list_item+)

2189

2190

2191class enumerated_list(Sequential, Element):

2192 valid_attributes: Final = Element.valid_attributes + (

2193 'enumtype', 'prefix', 'suffix', 'start')

2194 content_model: Final = ((list_item, '+'),) # (list_item+)

2195

2196

2197class term(Part, TextElement): pass

2198class classifier(Part, TextElement): pass

2199

2200

2201class definition(Part, Element):

2202 """Definition of a `term` in a `definition_list`."""

2203 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2204

2205

2206class definition_list_item(Part, Element):

2207 content_model: Final = ((term, '.'),

2208 ((classifier, term), '*'),

2209 (definition, '.'),

2210 )

2211 # ((term, classifier*)+, definition)

2212

2213

2214class definition_list(Sequential, Element):

2215 """List of terms and their definitions.

2216

2217 Can be used for glossaries or dictionaries, to describe or

2218 classify things, for dialogues, or to itemize subtopics.

2219 """

2220 content_model: Final = ((definition_list_item, '+'),)

2221 # (definition_list_item+)

2222

2223

2224class field_name(Part, TextElement): pass

2225

2226

2227class field_body(Part, Element):

2228 content_model: Final = ((Body, '*'),) # (%body.elements;)*

2229

2230

2231class field(Part, Bibliographic, Element):

2232 content_model: Final = ((field_name, '.'), (field_body, '.'))

2233 # (field_name, field_body)

2234

2235

2236class field_list(Sequential, Element):

2237 """List of label & data pairs.

2238

2239 Typically rendered as a two-column list.

2240 Also used for extension syntax or special processing.

2241 """

2242 content_model: Final = ((field, '+'),) # (field+)

2243

2244

2245class option_string(Part, PureTextElement):

2246 """A literal command-line option. Typically monospaced."""

2247

2248

2249class option_argument(Part, PureTextElement):

2250 """Placeholder text for option arguments."""

2251 valid_attributes: Final = Element.valid_attributes + ('delimiter',)

2252

2253 def astext(self) -> str:

2254 return self.get('delimiter', ' ') + TextElement.astext(self)

2255

2256

2257class option(Part, Element):

2258 """Option element in an `option_list_item`.

2259

2260 Groups an option string with zero or more option argument placeholders.

2261 """

2262 child_text_separator: Final = ''

2263 content_model: Final = ((option_string, '.'), (option_argument, '*'))

2264 # (option_string, option_argument*)

2265

2266

2267class option_group(Part, Element):

2268 """Groups together one or more `option` elements, all synonyms."""

2269 child_text_separator: Final = ', '

2270 content_model: Final = ((option, '+'),) # (option+)

2271

2272

2273class description(Part, Element):

2274 """Describtion of a command-line option."""

2275 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2276

2277

2278class option_list_item(Part, Element):

2279 """Container for a pair of `option_group` and `description` elements.

2280 """

2281 child_text_separator: Final = ' '

2282 content_model: Final = ((option_group, '.'), (description, '.'))

2283 # (option_group, description)

2284

2285

2286class option_list(Sequential, Element):

2287 """Two-column list of command-line options and descriptions."""

2288 content_model: Final = ((option_list_item, '+'),) # (option_list_item+)

2289

2290

2291# Pre-formatted text blocks

2292# -------------------------

2293

2294class literal_block(General, FixedTextElement): pass

2295class doctest_block(General, FixedTextElement): pass

2296

2297

2298class math_block(General, FixedTextElement, PureTextElement):

2299 """Mathematical notation (display formula)."""

2300

2301

2302class line(Part, TextElement):

2303 """Single line of text in a `line_block`."""

2304 indent: str | None = None

2305

2306

2307class line_block(General, Element):

2308 """Sequence of lines and nested line blocks.

2309 """

2310 # recursive content model: (line | line_block)+

2311

2312

2313line_block.content_model = (((line, line_block), '+'),)

2314

2315

2316# Admonitions

2317# -----------

2318# distinctive and self-contained notices

2319

2320class attention(Admonition, Element): pass

2321class caution(Admonition, Element): pass

2322class danger(Admonition, Element): pass

2323class error(Admonition, Element): pass

2324class important(Admonition, Element): pass

2325class note(Admonition, Element): pass

2326class tip(Admonition, Element): pass

2327class hint(Admonition, Element): pass

2328class warning(Admonition, Element): pass

2329

2330

2331class admonition(Admonition, Element):

2332 content_model: Final = ((title, '.'), (Body, '+'))

2333 # (title, (%body.elements;)+)

2334

2335

2336# Footnote and citation

2337# ---------------------

2338

2339class label(Part, PureTextElement):

2340 """Visible identifier for footnotes and citations."""

2341

2342

2343class footnote(General, BackLinkable, Element, Labeled, Targetable):

2344 """Labelled note providing additional context (footnote or endnote)."""

2345 valid_attributes: Final = Element.valid_attributes + ('auto', 'backrefs')

2346 content_model: Final = ((label, '?'), (Body, '+'))

2347 # (label?, (%body.elements;)+)

2348 # The label will become required in Docutils 1.0.

2349

2350

2351class citation(General, BackLinkable, Element, Labeled, Targetable):

2352 content_model: Final = ((label, '.'), (Body, '+'))

2353 # (label, (%body.elements;)+)

2354

2355

2356# Graphical elements

2357# ------------------

2358

2359class image(General, Inline, Element):

2360 """Reference to an image resource.

2361

2362 May be body element or inline element.

2363 """

2364 valid_attributes: Final = Element.valid_attributes + (

2365 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')

2366

2367 def astext(self) -> str:

2368 return self.get('alt', '')

2369

2370

2371class caption(Part, TextElement): pass

2372

2373

2374class legend(Part, Element):

2375 """A wrapper for text accompanying a `figure` that is not the caption."""

2376 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2377

2378

2379class figure(General, Element):

2380 """A formal figure, generally an illustration, with a title."""

2381 valid_attributes: Final = Element.valid_attributes + ('align', 'width')

2382 content_model: Final = ((image, '.'),

2383 (caption, '?'),

2384 (legend, '?'),

2385 )

2386 # (image, ((caption, legend?) | legend))

2387 # TODO: According to the DTD, a caption or legend is required

2388 # but rST allows "bare" figures which are formatted differently from

2389 # images (floating in LaTeX, nested in a <figure> in HTML). [bugs: #489]

2390

2391

2392# Tables

2393# ------

2394

2395class entry(Part, Element):

2396 """An entry in a `row` (a table cell)."""

2397 valid_attributes: Final = Element.valid_attributes + (

2398 'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',

2399 'morerows', 'namest', 'nameend', 'rowsep', 'valign')

2400 content_model: Final = ((Body, '*'),)

2401 # %tbl.entry.mdl -> (%body.elements;)*

2402

2403

2404class row(Part, Element):

2405 """Row of table cells."""

2406 valid_attributes: Final = Element.valid_attributes + ('rowsep', 'valign')

2407 content_model: Final = ((entry, '+'),) # (%tbl.row.mdl;) -> entry+

2408

2409

2410class colspec(Part, Element):

2411 """Specifications for a column in a `tgroup`."""

2412 valid_attributes: Final = Element.valid_attributes + (

2413 'align', 'char', 'charoff', 'colname', 'colnum',

2414 'colsep', 'colwidth', 'rowsep', 'stub')

2415

2416 def propwidth(self) -> int|float:

2417 """Return numerical value of "colwidth__" attribute. Default 1.

2418

2419 Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.

2420

2421 Provisional.

2422

2423 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth

2424 """

2425 # Move current implementation of validate_colwidth() here

2426 # in Docutils 1.0

2427 return validate_colwidth(self.get('colwidth', ''))

2428

2429

2430class thead(Part, Element):

2431 """Row(s) that form the head of a `tgroup`."""

2432 valid_attributes: Final = Element.valid_attributes + ('valign',)

2433 content_model: Final = ((row, '+'),) # (row+)

2434

2435

2436class tbody(Part, Element):

2437 """Body of a `tgroup`."""

2438 valid_attributes: Final = Element.valid_attributes + ('valign',)

2439 content_model: Final = ((row, '+'),) # (row+)

2440

2441

2442class tgroup(Part, Element):

2443 """A portion of a table. Most tables have just one `tgroup`."""

2444 valid_attributes: Final = Element.valid_attributes + (

2445 'align', 'cols', 'colsep', 'rowsep')

2446 content_model: Final = ((colspec, '*'), (thead, '?'), (tbody, '.'))

2447 # (colspec*, thead?, tbody)

2448

2449

2450class table(General, Element):

2451 """A data arrangement with rows and columns."""

2452 valid_attributes: Final = Element.valid_attributes + (

2453 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')

2454 content_model: Final = ((title, '?'), (tgroup, '+'))

2455 # (title?, tgroup+)

2456

2457

2458# Special purpose elements

2459# ------------------------

2460# Body elements for internal use or special requests.

2461

2462class comment(Invisible, FixedTextElement, PureTextElement):

2463 """Author notes, hidden from the output."""

2464

2465

2466class substitution_definition(Invisible, TextElement):

2467 valid_attributes: Final = Element.valid_attributes + ('ltrim', 'rtrim')

2468

2469

2470class target(Invisible, Inline, TextElement, Targetable):

2471 valid_attributes: Final = Element.valid_attributes + (

2472 'anonymous', 'refid', 'refname', 'refuri')

2473

2474

2475class system_message(Special, BackLinkable, PreBibliographic, Element):

2476 """

2477 System message element.

2478

2479 Do not instantiate this class directly; use

2480 ``document.reporter.info/warning/error/severe()`` instead.

2481 """

2482 valid_attributes: Final = BackLinkable.valid_attributes + (

2483 'level', 'line', 'type')

2484 content_model: Final = ((Body, '+'),) # (%body.elements;)+

2485

2486 def __init__(self,

2487 message: str | None = None,

2488 *children: Node,

2489 **attributes: Any,

2490 ) -> None:

2491 rawsource = attributes.pop('rawsource', '')

2492 if message:

2493 p = paragraph('', message)

2494 children = (p,) + children

2495 try:

2496 Element.__init__(self, rawsource, *children, **attributes)

2497 except: # NoQA: E722 (catchall)

2498 print('system_message: children=%r' % (children,))

2499 raise

2500

2501 def astext(self) -> str:

2502 line = self.get('line', '')

2503 return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],

2504 self['level'], Element.astext(self))

2505

2506

2507class pending(Invisible, Element):

2508 """

2509 Placeholder for pending operations.

2510

2511 The "pending" element is used to encapsulate a pending operation: the

2512 operation (transform), the point at which to apply it, and any data it

2513 requires. Only the pending operation's location within the document is

2514 stored in the public document tree (by the "pending" object itself); the

2515 operation and its data are stored in the "pending" object's internal

2516 instance attributes.

2517

2518 For example, say you want a table of contents in your reStructuredText

2519 document. The easiest way to specify where to put it is from within the

2520 document, with a directive::

2521

2522 .. contents::

2523

2524 But the "contents" directive can't do its work until the entire document

2525 has been parsed and possibly transformed to some extent. So the directive

2526 code leaves a placeholder behind that will trigger the second phase of its

2527 processing, something like this::

2528

2529 <pending ...public attributes...> + internal attributes

2530

2531 Use `document.note_pending()` so that the

2532 `docutils.transforms.Transformer` stage of processing can run all pending

2533 transforms.

2534 """

2535

2536 def __init__(self,

2537 transform: Transform,

2538 details: Mapping[str, Any] | None = None,

2539 rawsource: str = '',

2540 *children: Node,

2541 **attributes: Any,

2542 ) -> None:

2543 Element.__init__(self, rawsource, *children, **attributes)

2544

2545 self.transform: Transform = transform

2546 """The `docutils.transforms.Transform` class implementing the pending

2547 operation."""

2548

2549 self.details: Mapping[str, Any] = details or {}

2550 """Detail data (dictionary) required by the pending operation."""

2551

2552 def pformat(self, indent: str = ' ', level: int = 0) -> str:

2553 internals = ['.. internal attributes:',

2554 ' .transform: %s.%s' % (self.transform.__module__,

2555 self.transform.__name__),

2556 ' .details:']

2557 details = sorted(self.details.items())

2558 for key, value in details:

2559 if isinstance(value, Node):

2560 internals.append('%7s%s:' % ('', key))

2561 internals.extend(['%9s%s' % ('', line)

2562 for line in value.pformat().splitlines()])

2563 elif (value

2564 and isinstance(value, list)

2565 and isinstance(value[0], Node)):

2566 internals.append('%7s%s:' % ('', key))

2567 for v in value:

2568 internals.extend(['%9s%s' % ('', line)

2569 for line in v.pformat().splitlines()])

2570 else:

2571 internals.append('%7s%s: %r' % ('', key, value))

2572 return (Element.pformat(self, indent, level)

2573 + ''.join((' %s%s\n' % (indent * level, line))

2574 for line in internals))

2575

2576 def copy(self) -> Self:

2577 obj = self.__class__(self.transform, self.details, self.rawsource,

2578 **self.attributes)

2579 obj._document = self._document

2580 obj.source = self.source

2581 obj.line = self.line

2582 return obj

2583

2584

2585class raw(Special, Inline, PreBibliographic,

2586 FixedTextElement, PureTextElement):

2587 """Raw data that is to be passed untouched to the Writer.

2588

2589 Can be used as Body element or Inline element.

2590 """

2591 valid_attributes: Final = Element.valid_attributes + (

2592 'format', 'xml:space')

2593

2594

2595# Inline Elements

2596# ===============

2597

2598class abbreviation(Inline, TextElement): pass

2599class acronym(Inline, TextElement): pass

2600class emphasis(Inline, TextElement): pass

2601class generated(Inline, TextElement): pass

2602class inline(Inline, TextElement): pass

2603class literal(Inline, TextElement): pass

2604class strong(Inline, TextElement): pass

2605class subscript(Inline, TextElement): pass

2606class superscript(Inline, TextElement): pass

2607class title_reference(Inline, TextElement): pass

2608

2609

2610class reference(General, Inline, Referential, TextElement):

2611 valid_attributes: Final = Element.valid_attributes + (

2612 'anonymous', 'name', 'refid', 'refname', 'refuri')

2613

2614

2615class footnote_reference(Inline, Referential, PureTextElement):

2616 valid_attributes: Final = Element.valid_attributes + (

2617 'auto', 'refid', 'refname')

2618

2619

2620class citation_reference(Inline, Referential, PureTextElement):

2621 valid_attributes: Final = Element.valid_attributes + ('refid', 'refname')

2622

2623

2624class substitution_reference(Inline, TextElement):

2625 valid_attributes: Final = Element.valid_attributes + ('refname',)

2626

2627

2628class math(Inline, PureTextElement):

2629 """Mathematical notation in running text."""

2630

2631

2632class problematic(Inline, TextElement):

2633 valid_attributes: Final = Element.valid_attributes + (

2634 'refid', 'refname', 'refuri')

2635

2636

2637# ========================================

2638# Auxiliary Classes, Functions, and Data

2639# ========================================

2640

2641node_class_names: Sequence[str] = """

2642 Text

2643 abbreviation acronym address admonition attention attribution author

2644 authors

2645 block_quote bullet_list

2646 caption caution citation citation_reference classifier colspec comment

2647 compound contact container copyright

2648 danger date decoration definition definition_list definition_list_item

2649 description docinfo doctest_block document

2650 emphasis entry enumerated_list error

2651 field field_body field_list field_name figure footer

2652 footnote footnote_reference

2653 generated

2654 header hint

2655 image important inline

2656 label legend line line_block list_item literal literal_block

2657 math math_block meta

2658 note

2659 option option_argument option_group option_list option_list_item

2660 option_string organization

2661 paragraph pending problematic

2662 raw reference revision row rubric

2663 section sidebar status strong subscript substitution_definition

2664 substitution_reference subtitle superscript system_message

2665 table target tbody term tgroup thead tip title title_reference topic

2666 transition

2667 version

2668 warning""".split()

2669"""A list of names of all concrete Node subclasses."""

2670

2671

2672class NodeVisitor:

2673 """

2674 "Visitor" pattern [GoF95]_ abstract superclass implementation for

2675 document tree traversals.

2676

2677 Each node class has corresponding methods, doing nothing by

2678 default; override individual methods for specific and useful

2679 behaviour. The `dispatch_visit()` method is called by

2680 `Node.walk()` upon entering a node. `Node.walkabout()` also calls

2681 the `dispatch_departure()` method before exiting a node.

2682

2683 The dispatch methods call "``visit_`` + node class name" or

2684 "``depart_`` + node class name", resp.

2685

2686 This is a base class for visitors whose ``visit_...`` & ``depart_...``

2687 methods must be implemented for *all* compulsory node types encountered

2688 (such as for `docutils.writers.Writer` subclasses).

2689 Unimplemented methods will raise exceptions (except for optional nodes).

2690

2691 For sparse traversals, where only certain node types are of interest, use

2692 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform

2693 processing is desired, subclass `GenericNodeVisitor`.

2694

2695 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of

2696 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,

2697 1995.

2698 """

2699

2700 optional: ClassVar[tuple[str, ...]] = ('meta',)

2701 """

2702 Tuple containing node class names (as strings).

2703

2704 No exception will be raised if writers do not implement visit

2705 or departure functions for these node classes.

2706

2707 Used to ensure transitional compatibility with existing 3rd-party writers.

2708 """

2709

2710 def __init__(self, document: document, /) -> None:

2711 self.document: document = document

2712

2713 def dispatch_visit(self, node: Node) -> None:

2714 """

2715 Call self."``visit_`` + node class name" with `node` as

2716 parameter. If the ``visit_...`` method does not exist, call

2717 self.unknown_visit.

2718 """

2719 node_name = node.__class__.__name__

2720 method = getattr(self, 'visit_' + node_name, self.unknown_visit)

2721 self.document.reporter.debug(

2722 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'

2723 % (method.__name__, node_name))

2724 return method(node)

2725

2726 def dispatch_departure(self, node: Node) -> None:

2727 """

2728 Call self."``depart_`` + node class name" with `node` as

2729 parameter. If the ``depart_...`` method does not exist, call

2730 self.unknown_departure.

2731 """

2732 node_name = node.__class__.__name__

2733 method = getattr(self, 'depart_' + node_name, self.unknown_departure)

2734 self.document.reporter.debug(

2735 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'

2736 % (method.__name__, node_name))

2737 return method(node)

2738

2739 def unknown_visit(self, node: Node) -> None:

2740 """

2741 Called when entering unknown `Node` types.

2742

2743 Raise an exception unless overridden.

2744 """

2745 if (self.document.settings.strict_visitor

2746 or node.__class__.__name__ not in self.optional):

2747 raise NotImplementedError(

2748 '%s visiting unknown node type: %s'

2749 % (self.__class__, node.__class__.__name__))

2750

2751 def unknown_departure(self, node: Node) -> None:

2752 """

2753 Called before exiting unknown `Node` types.

2754

2755 Raise exception unless overridden.

2756 """

2757 if (self.document.settings.strict_visitor

2758 or node.__class__.__name__ not in self.optional):

2759 raise NotImplementedError(

2760 '%s departing unknown node type: %s'

2761 % (self.__class__, node.__class__.__name__))

2762

2763

2764class SparseNodeVisitor(NodeVisitor):

2765 """

2766 Base class for sparse traversals, where only certain node types are of

2767 interest. When ``visit_...`` & ``depart_...`` methods should be

2768 implemented for *all* node types (such as for `docutils.writers.Writer`

2769 subclasses), subclass `NodeVisitor` instead.

2770 """

2771

2772

2773class GenericNodeVisitor(NodeVisitor):

2774 """

2775 Generic "Visitor" abstract superclass, for simple traversals.

2776

2777 Unless overridden, each ``visit_...`` method calls `default_visit()`, and

2778 each ``depart_...`` method (when using `Node.walkabout()`) calls

2779 `default_departure()`. `default_visit()` (and `default_departure()`) must

2780 be overridden in subclasses.

2781

2782 Define fully generic visitors by overriding `default_visit()` (and

2783 `default_departure()`) only. Define semi-generic visitors by overriding

2784 individual ``visit_...()`` (and ``depart_...()``) methods also.

2785

2786 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should

2787 be overridden for default behavior.

2788 """

2789

2790 def default_visit(self, node: Node):

2791 """Override for generic, uniform traversals."""

2792 raise NotImplementedError

2793

2794 def default_departure(self, node: Node):

2795 """Override for generic, uniform traversals."""

2796 raise NotImplementedError

2797

2798

2799def _call_default_visit(self: GenericNodeVisitor, node: Node) -> None:

2800 self.default_visit(node)

2801

2802

2803def _call_default_departure(self: GenericNodeVisitor, node: Node) -> None:

2804 self.default_departure(node)

2805

2806

2807def _nop(self: SparseNodeVisitor, node: Node) -> None:

2808 pass

2809

2810

2811def _add_node_class_names(names) -> None:

2812 """Save typing with dynamic assignments:"""

2813 for _name in names:

2814 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)

2815 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)

2816 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)

2817 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)

2818

2819

2820_add_node_class_names(node_class_names)

2821

2822

2823class TreeCopyVisitor(GenericNodeVisitor):

2824 """

2825 Make a complete copy of a tree or branch, including element attributes.

2826 """

2827

2828 def __init__(self, document: document) -> None:

2829 super().__init__(document)

2830 self.parent_stack: list[list[Node]] = []

2831 self.parent: list[Node] = []

2832

2833 def get_tree_copy(self) -> Node:

2834 return self.parent[0]

2835

2836 def default_visit(self, node: Node) -> None:

2837 """Copy the current node, and make it the new acting parent."""

2838 newnode = node.copy()

2839 self.parent.append(newnode)

2840 self.parent_stack.append(self.parent)

2841 self.parent = newnode

2842

2843 def default_departure(self, node: Node) -> None:

2844 """Restore the previous acting parent."""

2845 self.parent = self.parent_stack.pop()

2846

2847

2848# Custom Exceptions

2849# =================

2850

2851class ValidationError(ValueError):

2852 """Invalid Docutils Document Tree Element."""

2853 def __init__(self, msg: str, problematic_element: Element = None) -> None:

2854 super().__init__(msg)

2855 self.problematic_element = problematic_element

2856

2857

2858class TreePruningException(Exception):

2859 """

2860 Base class for `NodeVisitor`-related tree pruning exceptions.

2861

2862 Raise subclasses from within ``visit_...`` or ``depart_...`` methods

2863 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune

2864 the tree traversed.

2865 """

2866

2867

2868class SkipChildren(TreePruningException):

2869 """

2870 Do not visit any children of the current node. The current node's

2871 siblings and ``depart_...`` method are not affected.

2872 """

2873

2874

2875class SkipSiblings(TreePruningException):

2876 """

2877 Do not visit any more siblings (to the right) of the current node. The

2878 current node's children and its ``depart_...`` method are not affected.

2879 """

2880

2881

2882class SkipNode(TreePruningException):

2883 """

2884 Do not visit the current node's children, and do not call the current

2885 node's ``depart_...`` method.

2886 """

2887

2888

2889class SkipDeparture(TreePruningException):

2890 """

2891 Do not call the current node's ``depart_...`` method. The current node's

2892 children and siblings are not affected.

2893 """

2894

2895

2896class NodeFound(TreePruningException):

2897 """

2898 Raise to indicate that the target of a search has been found. This

2899 exception must be caught by the client; it is not caught by the traversal

2900 code.

2901 """

2902

2903

2904class StopTraversal(TreePruningException):

2905 """

2906 Stop the traversal altogether. The current node's ``depart_...`` method

2907 is not affected. The parent nodes ``depart_...`` methods are also called

2908 as usual. No other nodes are visited. This is an alternative to

2909 NodeFound that does not cause exception handling to trickle up to the

2910 caller.

2911 """

2912

2913

2914# definition moved here from `utils` to avoid circular import dependency

2915def unescape(text: str,

2916 restore_backslashes: bool = False,

2917 respect_whitespace: bool = False,

2918 ) -> str:

2919 """

2920 Return a string with nulls removed or restored to backslashes.

2921 Backslash-escaped spaces are also removed.

2922 """

2923 # `respect_whitespace` is ignored (since introduction 2016-12-16)

2924 if restore_backslashes:

2925 return text.replace('\x00', '\\')

2926 else:

2927 for sep in ['\x00 ', '\x00\n', '\x00']:

2928 text = ''.join(text.split(sep))

2929 return text

2930

2931

2932def make_id(string: str) -> str:

2933 """

2934 Convert `string` into an identifier and return it.

2935

2936 Docutils identifiers will conform to the regular expression

2937 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"

2938 and "id" attributes) should have no underscores, colons, or periods.

2939 Hyphens may be used.

2940

2941 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:

2942

2943 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be

2944 followed by any number of letters, digits ([0-9]), hyphens ("-"),

2945 underscores ("_"), colons (":"), and periods (".").

2946

2947 - However the `CSS1 spec`_ defines identifiers based on the "name" token,

2948 a tighter interpretation ("flex" tokenizer notation; "latin1" and

2949 "escape" 8-bit characters have been replaced with entities)::

2950

2951 unicode \\[0-9a-f]{1,4}

2952 latin1 [¡-ÿ]

2953 escape {unicode}|\\[ -~¡-ÿ]

2954 nmchar [-a-z0-9]|{latin1}|{escape}

2955 name {nmchar}+

2956

2957 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),

2958 or periods ("."), therefore "class" and "id" attributes should not contain

2959 these characters. They should be replaced with hyphens ("-"). Combined

2960 with HTML's requirements (the first character must be a letter; no

2961 "unicode", "latin1", or "escape" characters), this results in the

2962 ``[a-z](-?[a-z0-9]+)*`` pattern.

2963

2964 .. _HTML 4.01 spec: https://www.w3.org/TR/html401

2965 .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1

2966 """

2967 id = string.lower()

2968 id = id.translate(_non_id_translate_digraphs)

2969 id = id.translate(_non_id_translate)

2970 # get rid of non-ascii characters.

2971 # 'ascii' lowercase to prevent problems with turkish locale.

2972 id = unicodedata.normalize(

2973 'NFKD', id).encode('ascii', 'ignore').decode('ascii')

2974 # shrink runs of whitespace and replace by hyphen

2975 id = _non_id_chars.sub('-', ' '.join(id.split()))

2976 id = _non_id_at_ends.sub('', id)

2977 return str(id)

2978

2979

2980_non_id_chars: re.Pattern[str] = re.compile('[^a-z0-9]+')

2981_non_id_at_ends: re.Pattern[str] = re.compile('^[-0-9]+|-+$')

2982_non_id_translate: dict[int, str] = {

2983 0x00f8: 'o', # o with stroke

2984 0x0111: 'd', # d with stroke

2985 0x0127: 'h', # h with stroke

2986 0x0131: 'i', # dotless i

2987 0x0142: 'l', # l with stroke

2988 0x0167: 't', # t with stroke

2989 0x0180: 'b', # b with stroke

2990 0x0183: 'b', # b with topbar

2991 0x0188: 'c', # c with hook

2992 0x018c: 'd', # d with topbar

2993 0x0192: 'f', # f with hook

2994 0x0199: 'k', # k with hook

2995 0x019a: 'l', # l with bar

2996 0x019e: 'n', # n with long right leg

2997 0x01a5: 'p', # p with hook

2998 0x01ab: 't', # t with palatal hook

2999 0x01ad: 't', # t with hook

3000 0x01b4: 'y', # y with hook

3001 0x01b6: 'z', # z with stroke

3002 0x01e5: 'g', # g with stroke

3003 0x0225: 'z', # z with hook

3004 0x0234: 'l', # l with curl

3005 0x0235: 'n', # n with curl

3006 0x0236: 't', # t with curl

3007 0x0237: 'j', # dotless j

3008 0x023c: 'c', # c with stroke

3009 0x023f: 's', # s with swash tail

3010 0x0240: 'z', # z with swash tail

3011 0x0247: 'e', # e with stroke

3012 0x0249: 'j', # j with stroke

3013 0x024b: 'q', # q with hook tail

3014 0x024d: 'r', # r with stroke

3015 0x024f: 'y', # y with stroke

3016}

3017_non_id_translate_digraphs: dict[int, str] = {

3018 0x00df: 'sz', # ligature sz

3019 0x00e6: 'ae', # ae

3020 0x0153: 'oe', # ligature oe

3021 0x0238: 'db', # db digraph

3022 0x0239: 'qp', # qp digraph

3023}

3024

3025

3026def dupname(node: Element, name: str) -> None:

3027 node['dupnames'].append(name)

3028 node['names'].remove(name)

3029 # Assume that `node` is referenced, even though it isn't;

3030 # we don't want to throw unnecessary system_messages.

3031 node.referenced = True

3032

3033

3034def fully_normalize_name(name: str) -> str:

3035 """Return a case- and whitespace-normalized name."""

3036 return ' '.join(name.lower().split())

3037

3038

3039def whitespace_normalize_name(name: str) -> str:

3040 """Return a whitespace-normalized name."""

3041 return ' '.join(name.split())

3042

3043

3044def serial_escape(value: str) -> str:

3045 """Escape string values that are elements of a list, for serialization."""

3046 return value.replace('\\', r'\\').replace(' ', r'\ ')

3047

3048

3049def split_name_list(s: str) -> list[str]:

3050 r"""Split a string at non-escaped whitespace.

3051

3052 Backslashes escape internal whitespace (cf. `serial_escape()`).

3053 Return list of "names" (after removing escaping backslashes).

3054

3055 >>> split_name_list(r'a\ n\ame two\\ n\\ames'),

3056 ['a name', 'two\\', r'n\ames']

3057

3058 Provisional.

3059 """

3060 s = s.replace('\\', '\x00') # escape with NULL char

3061 s = s.replace('\x00\x00', '\\') # unescape backslashes

3062 s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL

3063 names = s.split(' ')

3064 # restore internal spaces, drop other escaping characters

3065 return [name.replace('\x00\x00', ' ').replace('\x00', '')

3066 for name in names]

3067

3068

3069def pseudo_quoteattr(value: str) -> str:

3070 """Quote attributes for pseudo-xml"""

3071 return '"%s"' % value

3072

3073

3074def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'

3075 ) -> tuple[int|float, str]:

3076 """Parse a measure__, return value + unit.

3077

3078 `unit_pattern` is a regular expression describing recognized units.

3079 The default is suited for (but not limited to) CSS3 units and SI units.

3080 It matches runs of ASCII letters or Greek mu, a single percent sign,

3081 or no unit.

3082

3083 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure

3084

3085 Provisional.

3086 """

3087 match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)

3088 try:

3089 try:

3090 value = int(match.group(1))

3091 except ValueError:

3092 value = float(match.group(1))

3093 unit = match.group(2)

3094 except (AttributeError, ValueError):

3095 raise ValueError(f'"{measure}" is no valid measure.')

3096 return value, unit

3097

3098

3099# Methods to validate `Element attribute`__ values.

3100

3101# Ensure the expected Python `data type`__, normalize, and check for

3102# restrictions.

3103#

3104# The methods can be used to convert `str` values (eg. from an XML

3105# representation) or to validate an existing document tree or node.

3106#

3107# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,

3108# and the `attribute_validating_functions` mapping below.

3109#

3110# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference

3111# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types

3112

3113def create_keyword_validator(*keywords: str) -> Callable[[str], str]:

3114 """

3115 Return a function that validates a `str` against given `keywords`.

3116

3117 Provisional.

3118 """

3119 def validate_keywords(value: str) -> str:

3120 if value not in keywords:

3121 allowed = '", \"'.join(keywords)

3122 raise ValueError(f'"{value}" is not one of "{allowed}".')

3123 return value

3124 return validate_keywords

3125

3126

3127def validate_identifier(value: str) -> str:

3128 """

3129 Validate identifier key or class name.

3130

3131 Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.

3132

3133 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type

3134

3135 Provisional.

3136 """

3137 if value != make_id(value):

3138 raise ValueError(f'"{value}" is no valid id or class name.')

3139 return value

3140

3141

3142def validate_identifier_list(value: str | list[str]) -> list[str]:

3143 """

3144 A (space-separated) list of ids or class names.

3145

3146 `value` may be a `list` or a `str` with space separated

3147 ids or class names (cf. `validate_identifier()`).

3148

3149 Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.

3150

3151 __ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type

3152 __ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type

3153 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type

3154

3155 Provisional.

3156 """

3157 if isinstance(value, str):

3158 value = value.split()

3159 for token in value:

3160 validate_identifier(token)

3161 return value

3162

3163

3164def validate_measure(measure: str) -> str:

3165 """

3166 Validate a measure__ (number + optional unit). Return normalized `str`.

3167

3168 See `parse_measure()` for a function returning a "number + unit" tuple.

3169

3170 The unit may be a run of ASCII letters or Greek mu, a single percent sign,

3171 or the empty string. Case is preserved.

3172

3173 Provisional.

3174

3175 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure

3176 """

3177 value, unit = parse_measure(measure)

3178 return f'{value}{unit}'

3179

3180

3181def validate_colwidth(measure: str|int|float) -> int|float:

3182 """Validate the "colwidth__" attribute.

3183

3184 Provisional:

3185 `measure` must be a `str` and will be returned as normalized `str`

3186 (with unit "*" for proportional values) in Docutils 1.0.

3187

3188 The default unit will change to "pt" in Docutils 2.0.

3189

3190 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth

3191 """

3192 if isinstance(measure, (int, float)):

3193 value = measure

3194 elif measure in ('*', ''): # short for '1*'

3195 value = 1

3196 else:

3197 try:

3198 value, unit = parse_measure(measure, unit_pattern='[*]?')

3199 except ValueError:

3200 value = -1

3201 if value <= 0:

3202 raise ValueError(f'"{measure}" is no proportional measure.')

3203 return value

3204

3205

3206def validate_NMTOKEN(value: str) -> str:

3207 """

3208 Validate a "name token": a `str` of ASCII letters, digits, and [-._].

3209

3210 Provisional.

3211 """

3212 if not re.fullmatch('[-._A-Za-z0-9]+', value):

3213 raise ValueError(f'"{value}" is no NMTOKEN.')

3214 return value

3215

3216

3217def validate_NMTOKENS(value: str | list[str]) -> list[str]:

3218 """

3219 Validate a list of "name tokens".

3220

3221 Provisional.

3222 """

3223 if isinstance(value, str):

3224 value = value.split()

3225 for token in value:

3226 validate_NMTOKEN(token)

3227 return value

3228

3229

3230def validate_refname_list(value: str | list[str]) -> list[str]:

3231 """

3232 Validate a list of `reference names`__.

3233

3234 Reference names may contain all characters;

3235 whitespace is normalized (cf, `whitespace_normalize_name()`).

3236

3237 `value` may be either a `list` of names or a `str` with

3238 space separated names (with internal spaces backslash escaped

3239 and literal backslashes doubled cf. `serial_escape()`).

3240

3241 Return a list of whitespace-normalized, unescaped reference names.

3242

3243 Provisional.

3244

3245 __ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name

3246 """

3247 if isinstance(value, str):

3248 value = split_name_list(value)

3249 return [whitespace_normalize_name(name) for name in value]

3250

3251

3252def validate_yesorno(value: str | int | bool) -> bool:

3253 """Validate a `%yesorno`__ (flag) value.

3254

3255 The string literal "0" evaluates to ``False``, all other

3256 values are converterd with `bool()`.

3257

3258 __ https://docutils.sourceforge.io/docs/ref/doctree.html#yesorno

3259 """

3260 if value == "0":

3261 return False

3262 return bool(value)

3263

3264

3265ATTRIBUTE_VALIDATORS: dict[str, Callable[[str], Any]] = {

3266 'alt': str, # CDATA

3267 'align': str,

3268 'anonymous': validate_yesorno,

3269 'auto': str, # CDATA (only '1' or '*' are used in rST)

3270 'backrefs': validate_identifier_list,

3271 'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)

3272 'classes': validate_identifier_list,

3273 'char': str, # from Exchange Table Model (CALS), currently ignored

3274 'charoff': validate_NMTOKEN, # from CALS, currently ignored

3275 'colname': validate_NMTOKEN, # from CALS, currently ignored

3276 'colnum': int, # from CALS, currently ignored

3277 'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".

3278 'colsep': validate_yesorno,

3279 'colwidth': validate_colwidth, # see docstring for pending changes

3280 'content': str, # <meta>

3281 'delimiter': str,

3282 'dir': create_keyword_validator('ltr', 'rtl', 'auto'), # <meta>

3283 'dupnames': validate_refname_list,

3284 'enumtype': create_keyword_validator('arabic', 'loweralpha', 'lowerroman',

3285 'upperalpha', 'upperroman'),

3286 'format': str, # CDATA (space separated format names)

3287 'frame': create_keyword_validator('top', 'bottom', 'topbot', 'all',

3288 'sides', 'none'), # from CALS, ignored

3289 'height': validate_measure,

3290 'http-equiv': str, # <meta>

3291 'ids': validate_identifier_list,

3292 'lang': str, # <meta>

3293 'level': int,

3294 'line': int,

3295 'ltrim': validate_yesorno,

3296 'loading': create_keyword_validator('embed', 'link', 'lazy'),

3297 'media': str, # <meta>

3298 'morecols': int,

3299 'morerows': int,

3300 'name': whitespace_normalize_name, # in <reference> (deprecated)

3301 # 'name': node_attributes.validate_NMTOKEN, # in <meta>

3302 'names': validate_refname_list,

3303 'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored

3304 'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored

3305 'pgwide': validate_yesorno, # from CALS, currently ignored

3306 'prefix': str,

3307 'refid': validate_identifier,

3308 'refname': whitespace_normalize_name,

3309 'refuri': str,

3310 'rowsep': validate_yesorno,

3311 'rtrim': validate_yesorno,

3312 'scale': int,

3313 'scheme': str,

3314 'source': str,

3315 'start': int,

3316 'stub': validate_yesorno,

3317 'suffix': str,

3318 'title': str,

3319 'type': validate_NMTOKEN,

3320 'uri': str,

3321 'valign': create_keyword_validator('top', 'middle', 'bottom'), # from CALS

3322 'width': validate_measure,

3323 'xml:space': create_keyword_validator('default', 'preserve'),

3324 }

3325"""

3326Mapping of `attribute names`__ to validating functions.

3327

3328Provisional.

3329

3330__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference

3331"""