Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/parsers/rst/states.py: 92%

1# $Id: states.py 10351 2026-06-11 21:51:21Z milde $

2# Author: David Goodger <goodger@python.org>

3# Copyright: This module has been placed in the public domain.

5"""

6This is the ``docutils.parsers.rst.states`` module, the core of

7the reStructuredText parser. It defines the following:

9:Classes:

10 - `RSTStateMachine`: reStructuredText parser's entry point.

11 - `NestedStateMachine`: recursive StateMachine.

12 - `RSTState`: reStructuredText State superclass.

13 - `Inliner`: For parsing inline markup.

14 - `Body`: Generic classifier of the first line of a block.

15 - `SpecializedBody`: Superclass for compound element members.

16 - `BulletList`: Second and subsequent bullet_list list_items

17 - `DefinitionList`: Second+ definition_list_items.

18 - `EnumeratedList`: Second+ enumerated_list list_items.

19 - `FieldList`: Second+ fields.

20 - `OptionList`: Second+ option_list_items.

21 - `RFC2822List`: Second+ RFC2822-style fields.

22 - `ExtensionOptions`: Parses directive option fields.

23 - `Explicit`: Second+ explicit markup constructs.

24 - `SubstitutionDef`: For embedded directives in substitution definitions.

25 - `Text`: Classifier of second line of a text block.

26 - `SpecializedText`: Superclass for continuation lines of Text-variants.

27 - `Definition`: Second line of potential definition_list_item.

28 - `Line`: Second line of overlined section title or transition marker.

29 - `Struct`: obsolete, use `types.SimpleNamespace`.

31:Exception classes:

32 - `MarkupError`

33 - `ParserError`

34 - `MarkupMismatch`

36:Functions:

37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.

38 - `unescape()`: Return a string, nulls removed or restored to backslashes.

40:Attributes:

41 - `state_classes`: set of State classes used with `RSTStateMachine`.

43Parser Overview

44===============

46The reStructuredText parser is implemented as a recursive state machine,

47examining its input one line at a time. To understand how the parser works,

48please first become familiar with the `docutils.statemachine` module. In the

49description below, references are made to classes defined in this module;

50please see the individual classes for details.

52Parsing proceeds as follows:

541. The state machine examines each line of input, checking each of the

55 transition patterns of the state `Body`, in order, looking for a match.

56 The implicit transitions (blank lines and indentation) are checked before

57 any others. The 'text' transition is a catch-all (matches anything).

592. The method associated with the matched transition pattern is called.

61 A. Some transition methods are self-contained, appending elements to the

62 document tree (`Body.doctest` parses a doctest block). The parser's

63 current line index is advanced to the end of the element, and parsing

64 continues with step 1.

66 B. Other transition methods trigger the creation of a nested state machine,

67 whose job is to parse a compound construct ('indent' does a block quote,

68 'bullet' does a bullet list, 'overline' does a section [first checking

69 for a valid section header], etc.).

71 - In the case of lists and explicit markup, a one-off state machine is

72 created and run to parse contents of the first item.

74 - A new state machine is created and its initial state is set to the

75 appropriate specialized state (`BulletList` in the case of the

76 'bullet' transition; see `SpecializedBody` for more detail). This

77 state machine is run to parse the compound element (or series of

78 explicit markup elements), and returns as soon as a non-member element

79 is encountered. For example, the `BulletList` state machine ends as

80 soon as it encounters an element which is not a list item of that

81 bullet list. The optional omission of inter-element blank lines is

82 enabled by this nested state machine.

84 - The current line index is advanced to the end of the elements parsed,

85 and parsing continues with step 1.

87 C. The result of the 'text' transition depends on the next line of text.

88 The current state is changed to `Text`, under which the second line is

89 examined. If the second line is:

91 - Indented: The element is a definition list item, and parsing proceeds

92 similarly to step 2.B, using the `DefinitionList` state.

94 - A line of uniform punctuation characters: The element is a section

95 header; again, parsing proceeds as in step 2.B, and `Body` is still

96 used.

98 - Anything else: The element is a paragraph, which is examined for

99 inline markup and appended to the parent element. Processing

100 continues with step 1.

101"""

102

103from __future__ import annotations

104

105__docformat__ = 'reStructuredText'

106

107import re

108from types import FunctionType, MethodType

109from types import SimpleNamespace as Struct

110import warnings

111

112from docutils import nodes, statemachine, utils

113from docutils import ApplicationError, DataError

114from docutils.statemachine import StateMachineWS, StateWS

115from docutils.nodes import fully_normalize_name as normalize_name

116from docutils.nodes import unescape, whitespace_normalize_name

117import docutils.parsers.rst

118from docutils.parsers.rst import directives, languages, tableparser, roles

119from docutils.utils import escape2null, column_width, strip_combining_chars

120from docutils.utils import punctuation_chars, urischemes

121from docutils.utils import split_escaped_whitespace

122from docutils.utils._roman_numerals import (InvalidRomanNumeralError,

123 RomanNumeral)

124

125TYPE_CHECKING = False

126if TYPE_CHECKING:

127 from docutils.statemachine import StringList

128

129

130class MarkupError(DataError): pass

131class UnknownInterpretedRoleError(DataError): pass

132class InterpretedRoleNotImplementedError(DataError): pass

133class ParserError(ApplicationError): pass

134class MarkupMismatch(Exception): pass

135

136

137class RSTStateMachine(StateMachineWS):

138

139 """

140 reStructuredText's master StateMachine.

141

142 The entry point to reStructuredText parsing is the `run()` method.

143 """

144 section_level_offset: int = 0

145 """Correction term for section level determination in nested parsing.

146

147 Updated by `RSTState.nested_parse()` and used in

148 `RSTState.check_subsection()` to compensate differences when

149 nested parsing uses a detached base node with a document-wide

150 section title style hierarchy or the current node with a new,

151 independent title style hierarchy.

152 """

153

154 def run(self, input_lines, document, input_offset=0, match_titles=True,

155 inliner=None) -> None:

156 """

157 Parse `input_lines` and modify the `document` node in place.

158

159 Extend `StateMachineWS.run()`: set up parse-global data and

160 run the StateMachine.

161 """

162 self.language = languages.get_language(

163 document.settings.language_code, document.reporter)

164 self.match_titles = match_titles

165 if inliner is None:

166 inliner = Inliner()

167 inliner.init_customizations(document.settings)

168 # A collection of objects to share with nested parsers.

169 # The attributes `reporter`, `section_level`, and

170 # `section_bubble_up_kludge` will be removed in Docutils 2.0

171 self.memo = Struct(document=document,

172 reporter=document.reporter, # ignored

173 language=self.language,

174 title_styles=[],

175 section_level=0, # ignored

176 section_bubble_up_kludge=False, # ignored

177 inliner=inliner)

178 self.document = document

179 self.attach_observer(document.note_source)

180 self.reporter = self.document.reporter

181 self.node = document

182 results = StateMachineWS.run(self, input_lines, input_offset,

183 input_source=document['source'])

184 assert results == [], 'RSTStateMachine.run() results should be empty!'

185 self.node = self.memo = None # remove unneeded references

186

187

188class NestedStateMachine(RSTStateMachine):

189 """

190 StateMachine run from within other StateMachine runs, to parse nested

191 document structures.

192 """

193

194 def __init__(self, state_classes, initial_state,

195 debug=False, parent_state_machine=None) -> None:

196

197 self.parent_state_machine = parent_state_machine

198 """The instance of the parent state machine."""

199

200 super().__init__(state_classes, initial_state, debug)

201

202 def run(self, input_lines, input_offset, memo, node, match_titles=True):

203 """

204 Parse `input_lines` and populate `node`.

205

206 Extend `StateMachineWS.run()`: set up document-wide data.

207 """

208 self.match_titles = match_titles

209 self.memo = memo

210 self.document = memo.document

211 self.attach_observer(self.document.note_source)

212 self.language = memo.language

213 self.reporter = self.document.reporter

214 self.node = node

215 results = StateMachineWS.run(self, input_lines, input_offset)

216 assert results == [], ('NestedStateMachine.run() results should be '

217 'empty!')

218 return results

219

220

221class RSTState(StateWS):

222

223 """

224 reStructuredText State superclass.

225

226 Contains methods used by all State subclasses.

227 """

228

229 nested_sm = NestedStateMachine

230 nested_sm_cache = []

231

232 def __init__(self, state_machine: RSTStateMachine, debug=False) -> None:

233 self.nested_sm_kwargs = {'state_classes': state_classes,

234 'initial_state': 'Body'}

235 StateWS.__init__(self, state_machine, debug)

236

237 def runtime_init(self) -> None:

238 StateWS.runtime_init(self)

239 memo = self.state_machine.memo

240 self.memo = memo

241 self.document = memo.document

242 self.inliner = memo.inliner

243 self.reporter = self.document.reporter

244 # enable the reporter to determine source and source-line

245 if not hasattr(self.reporter, 'get_source_and_line'):

246 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501

247

248 @property

249 def parent(self) -> nodes.Element | None:

250 return self.state_machine.node

251

252 @parent.setter

253 def parent(self, value: nodes.Element):

254 self.state_machine.node = value

255

256 def goto_line(self, abs_line_offset) -> None:

257 """

258 Jump to input line `abs_line_offset`, ignoring jumps past the end.

259 """

260 try:

261 self.state_machine.goto_line(abs_line_offset)

262 except EOFError:

263 pass

264

265 def no_match(self, context, transitions):

266 """

267 Override `StateWS.no_match` to generate a system message.

268

269 This code should never be run.

270 """

271 self.reporter.severe(

272 'Internal error: no transition pattern match. State: "%s"; '

273 'transitions: %s; context: %s; current line: %r.'

274 % (self.__class__.__name__, transitions, context,

275 self.state_machine.line))

276 return context, None, []

277

278 def bof(self, context):

279 """Called at beginning of file."""

280 return [], []

281

282 def nested_parse(self,

283 block: StringList,

284 input_offset: int,

285 node: nodes.Element|None = None,

286 match_titles: bool = False,

287 state_machine_class: StateMachineWS|None = None,

288 state_machine_kwargs: dict|None = None

289 ) -> int:

290 """

291 Parse the input `block` with a nested state-machine rooted at `node`.

292

293 :block:

294 reStructuredText source extract.

295 :input_offset:

296 Line number at start of the block.

297 :node:

298 Base node. Generated nodes will be appended to this node.

299 Default: the "current node" (`self.state_machine.node`).

300 :match_titles:

301 Allow section titles?

302 Caution: With a custom base node, this may lead to an invalid

303 or mixed up document tree. [#]_

304 :state_machine_class:

305 Default: `NestedStateMachine`.

306 :state_machine_kwargs:

307 Keyword arguments for the state-machine instantiation.

308 Default: `self.nested_sm_kwargs`.

309

310 Create a new state-machine instance if required.

311 Return new offset.

312

313 .. [#] See also ``test_parsers/test_rst/test_nested_parsing.py``

314 and Sphinx's `nested_parse_to_nodes()`__.

315

316 __ https://www.sphinx-doc.org/en/master/extdev/utils.html

317 #sphinx.util.parsing.nested_parse_to_nodes

318 """

319 if node is None:

320 node = self.state_machine.node

321 use_default = 0

322 if state_machine_class is None:

323 state_machine_class = self.nested_sm

324 use_default += 1

325 if state_machine_kwargs is None:

326 state_machine_kwargs = self.nested_sm_kwargs

327 use_default += 1

328 my_state_machine = None

329 if use_default == 2:

330 try:

331 # get cached state machine, prevent others from using it

332 my_state_machine = self.nested_sm_cache.pop()

333 except IndexError:

334 pass

335 if not my_state_machine:

336 my_state_machine = state_machine_class(

337 debug=self.debug,

338 parent_state_machine=self.state_machine,

339 **state_machine_kwargs)

340 # Check if we may use sections (with a caveat for custom nodes

341 # that may be dummies to collect children):

342 if (node == self.state_machine.node

343 and not isinstance(node, (nodes.document, nodes.section))):

344 match_titles = False # avoid invalid sections

345 if match_titles:

346 # Compensate mismatch of known title styles and number of

347 # parent sections of the base node if the document wide

348 # title styles are used with a detached base node or

349 # a new list of title styles with the current parent node:

350 l_node = len(node.section_hierarchy())

351 l_start = min(len(self.parent.section_hierarchy()),

352 len(self.memo.title_styles))

353 my_state_machine.section_level_offset = l_start - l_node

354

355 # run the state machine and populate `node`:

356 block_length = len(block)

357 my_state_machine.run(block, input_offset, self.memo,

358 node, match_titles)

359

360 if match_titles:

361 if node == self.state_machine.node:

362 # Pass on the new "current node" to parent state machines:

363 sm = self.state_machine

364 try:

365 while True:

366 sm.node = my_state_machine.node

367 sm = sm.parent_state_machine

368 except AttributeError:

369 pass

370 # clean up

371 new_offset = my_state_machine.abs_line_offset()

372 if use_default == 2:

373 self.nested_sm_cache.append(my_state_machine)

374 else:

375 my_state_machine.unlink()

376 # No `block.parent` implies disconnected -- lines aren't in sync:

377 if block.parent and (len(block) - block_length) != 0:

378 # Adjustment for block if modified in nested parse:

379 self.state_machine.next_line(len(block) - block_length)

380 return new_offset

381

382 def nested_list_parse(self, block, input_offset, node, initial_state,

383 blank_finish,

384 blank_finish_state=None,

385 extra_settings={},

386 match_titles=False, # deprecated, will be removed

387 state_machine_class=None,

388 state_machine_kwargs=None):

389 """

390 Parse the input `block` with a nested state-machine rooted at `node`.

391

392 Create a new StateMachine rooted at `node` and run it over the

393 input `block` (see also `nested_parse()`).

394 Also keep track of optional intermediate blank lines and the

395 required final one.

396

397 Return new offset and a boolean indicating whether there was a

398 blank final line.

399 """

400 if match_titles:

401 warnings.warn('The "match_titles" argument of '

402 'parsers.rst.states.RSTState.nested_list_parse() '

403 'will be ignored in Docutils 1.0 '

404 'and removed in Docutils 2.0.',

405 PendingDeprecationWarning, stacklevel=2)

406 if state_machine_class is None:

407 state_machine_class = self.nested_sm

408 if state_machine_kwargs is None:

409 state_machine_kwargs = self.nested_sm_kwargs.copy()

410 state_machine_kwargs['initial_state'] = initial_state

411 my_state_machine = state_machine_class(

412 debug=self.debug,

413 parent_state_machine=self.state_machine,

414 **state_machine_kwargs)

415 if blank_finish_state is None:

416 blank_finish_state = initial_state

417 my_state_machine.states[blank_finish_state].blank_finish = blank_finish

418 for key, value in extra_settings.items():

419 setattr(my_state_machine.states[initial_state], key, value)

420 my_state_machine.run(block, input_offset, memo=self.memo,

421 node=node, match_titles=match_titles)

422 blank_finish = my_state_machine.states[blank_finish_state].blank_finish

423 my_state_machine.unlink()

424 return my_state_machine.abs_line_offset(), blank_finish

425

426 def section(self, title, source, style, lineno, messages) -> None:

427 """Check for a valid subsection and create one if it checks out."""

428 if self.check_subsection(source, style, lineno):

429 self.new_subsection(title, lineno, messages)

430

431 def check_subsection(self, source, style, lineno) -> bool:

432 """

433 Check for a valid subsection header. Update section data in `memo`.

434

435 When a new section is reached that isn't a subsection of the current

436 section, set `self.parent` to the new section's parent section

437 (or the root node if the new section is a top-level section).

438 """

439 title_styles = self.memo.title_styles

440 parent_sections = self.parent.section_hierarchy()

441 # current section level: (0 root, 1 section, 2 subsection, ...)

442 oldlevel = (len(parent_sections)

443 + self.state_machine.section_level_offset)

444 # new section level:

445 try: # check for existing title style

446 newlevel = title_styles.index(style) + 1

447 except ValueError: # new title style

448 newlevel = len(title_styles) + 1

449 # The new level must not be deeper than an immediate child

450 # of the current level:

451 if newlevel > oldlevel + 1:

452 styles = ' '.join('/'.join(style) for style in title_styles)

453 self.parent += self.reporter.error(

454 'Inconsistent title style:'

455 f' skip from level {oldlevel} to {newlevel}.',

456 nodes.literal_block('', source),

457 nodes.paragraph('', f'Established title styles: {styles}'),

458 line=lineno)

459 return False

460 if newlevel <= oldlevel:

461 # new section is sibling or higher up in the section hierarchy

462 try:

463 new_parent = parent_sections[newlevel-oldlevel-1].parent

464 except IndexError:

465 styles = ' '.join('/'.join(style) for style in title_styles)

466 details = (f'The parent of level {newlevel} sections cannot'

467 ' be reached. The parser is at section level'

468 f' {oldlevel} but the current node has only'

469 f' {len(parent_sections)} parent section(s).'

470 '\nOne reason may be a high level'

471 ' section used in a directive that parses its'

472 ' content into a base node not attached to'

473 ' the document\n(up to Docutils 0.21,'

474 ' these sections were silently dropped).')

475 self.parent += self.reporter.error(

476 f'A level {newlevel} section cannot be used here.',

477 nodes.literal_block('', source),

478 nodes.paragraph('', f'Established title styles: {styles}'),

479 nodes.paragraph('', details),

480 line=lineno)

481 return False

482 self.parent = new_parent

483 self.memo.section_level = newlevel - 1

484 if newlevel > len(title_styles):

485 title_styles.append(style)

486 return True

487

488 def title_inconsistent(self, sourcetext, lineno):

489 # Ignored. Will be removed in Docutils 2.0.

490 error = self.reporter.error(

491 'Title level inconsistent:', nodes.literal_block('', sourcetext),

492 line=lineno)

493 return error

494

495 def new_subsection(self, title, lineno, messages):

496 """Append new subsection to document tree."""

497 section_node = nodes.section()

498 self.parent += section_node

499 textnodes, title_messages = self.inline_text(title, lineno)

500 titlenode = nodes.title(title, '', *textnodes)

501 name = normalize_name(titlenode.astext())

502 section_node['names'].append(name)

503 section_node += titlenode

504 section_node += messages

505 section_node += title_messages

506 self.document.note_implicit_target(section_node, section_node)

507 # Update state:

508 self.parent = section_node

509 self.memo.section_level += 1

510

511 def paragraph(self, lines, lineno):

512 """

513 Return a list (paragraph & messages) & a boolean: literal_block next?

514 """

515 data = '\n'.join(lines).rstrip()

516 if re.search(r'(?<!\\)(\\\\)*::$', data):

517 if len(data) == 2:

518 return [], 1

519 elif data[-3] in ' \n':

520 text = data[:-3].rstrip()

521 else:

522 text = data[:-1]

523 literalnext = 1

524 else:

525 text = data

526 literalnext = 0

527 textnodes, messages = self.inline_text(text, lineno)

528 p = nodes.paragraph(data, '', *textnodes)

529 p.source, p.line = self.state_machine.get_source_and_line(lineno)

530 return [p] + messages, literalnext

531

532 def inline_text(self, text, lineno):

533 """

534 Return 2 lists: nodes (text and inline elements), and system_messages.

535 """

536 nodes, messages = self.inliner.parse(text, lineno,

537 self.memo, self.parent)

538 return nodes, messages

539

540 def unindent_warning(self, node_name):

541 # the actual problem is one line below the current line

542 lineno = self.state_machine.abs_line_number() + 1

543 return self.reporter.warning('%s ends without a blank line; '

544 'unexpected unindent.' % node_name,

545 line=lineno)

546

547

548def build_regexp(definition, compile_patterns=True):

549 """

550 Build, compile and return a regular expression based on `definition`.

551

552 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),

553 where "parts" is a list of regular expressions and/or regular

554 expression definitions to be joined into an or-group.

555 """

556 name, prefix, suffix, parts = definition

557 part_strings = []

558 for part in parts:

559 if isinstance(part, tuple):

560 part_strings.append(build_regexp(part, None))

561 else:

562 part_strings.append(part)

563 or_group = '|'.join(part_strings)

564 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()

565 if compile_patterns:

566 return re.compile(regexp)

567 else:

568 return regexp

569

570

571class Inliner:

572

573 """

574 Parse inline markup; call the `parse()` method.

575 """

576

577 def __init__(self) -> None:

578 self.implicit_dispatch = []

579 """List of (pattern, bound method) tuples, used by

580 `self.implicit_inline`."""

581

582 def init_customizations(self, settings) -> None:

583 # lookahead and look-behind expressions for inline markup rules

584 if getattr(settings, 'character_level_inline_markup', False):

585 start_string_prefix = '(^|(?<!\x00))'

586 end_string_suffix = ''

587 else:

588 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %

589 (punctuation_chars.openers,

590 punctuation_chars.delimiters))

591 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %

592 (punctuation_chars.closing_delimiters,

593 punctuation_chars.delimiters,

594 punctuation_chars.closers))

595 args = locals().copy()

596 args.update(vars(self.__class__))

597

598 parts = ('initial_inline', start_string_prefix, '',

599 [

600 ('start', '', self.non_whitespace_after, # simple start-strings

601 [r'\*\*', # strong

602 r'\*(?!\*)', # emphasis but not strong

603 r'``', # literal

604 r'_`', # inline internal target

605 r'\|(?!\|)'] # substitution reference

606 ),

607 ('whole', '', end_string_suffix, # whole constructs

608 [ # reference name & end-string

609 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,

610 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',

611 [r'[0-9]+', # manually numbered

612 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)

613 r'\*', # auto-symbol

614 r'(?P<citationlabel>%s)' % self.simplename, # citation ref

615 ]

616 )

617 ]

618 ),

619 ('backquote', # interpreted text or phrase reference

620 '(?P<role>(:%s:)?)' % self.simplename, # optional role

621 self.non_whitespace_after,

622 ['`(?!`)'] # but not literal

623 )

624 ]

625 )

626 self.start_string_prefix = start_string_prefix

627 self.end_string_suffix = end_string_suffix

628 self.parts = parts

629

630 self.patterns = Struct(

631 initial=build_regexp(parts),

632 emphasis=re.compile(self.non_whitespace_escape_before

633 + r'(\*)' + end_string_suffix),

634 strong=re.compile(self.non_whitespace_escape_before

635 + r'(\*\*)' + end_string_suffix),

636 interpreted_or_phrase_ref=re.compile(

637 r"""

638 %(non_unescaped_whitespace_escape_before)s

639 (

640 `

641 (?P<suffix>

642 (?P<role>:%(simplename)s:)?

643 (?P<refend>__?)?

644 )

645 )

646 %(end_string_suffix)s

647 """ % args, re.VERBOSE),

648 embedded_link=re.compile(

649 r"""

650 (

651 (?:[ \n]+|^) # spaces or beginning of line/string

652 < # open bracket

653 %(non_whitespace_after)s

654 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets

655 %(non_whitespace_escape_before)s

656 > # close bracket

657 )

658 $ # end of string

659 """ % args, re.VERBOSE),

660 literal=re.compile(self.non_whitespace_before + '(``)'

661 + end_string_suffix),

662 target=re.compile(self.non_whitespace_escape_before

663 + r'(`)' + end_string_suffix),

664 substitution_ref=re.compile(self.non_whitespace_escape_before

665 + r'(\|_{0,2})'

666 + end_string_suffix),

667 email=re.compile(self.email_pattern % args + '$',

668 re.VERBOSE),

669 uri=re.compile(

670 (r"""

671 %(start_string_prefix)s

672 (?P<whole>

673 (?P<absolute> # absolute URI

674 (?P<scheme> # scheme (http, ftp, mailto)

675 [a-zA-Z][a-zA-Z0-9.+-]*

676 )

677 :

678 (

679 ( # either:

680 (//?)? # hierarchical URI

681 %(uric)s* # URI characters

682 %(uri_end)s # final URI char

683 )

684 ( # optional query

685 \?%(uric)s*

686 %(uri_end)s

687 )?

688 ( # optional fragment

689 \#%(uric)s*

690 %(uri_end)s

691 )?

692 )

693 )

694 | # *OR*

695 (?P<email> # email address

696 """ + self.email_pattern + r"""

697 )

698 )

699 %(end_string_suffix)s

700 """) % args, re.VERBOSE),

701 pep=re.compile(

702 r"""

703 %(start_string_prefix)s

704 (

705 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file

706 |

707 (PEP\s+(?P<pepnum2>\d+)) # reference by name

708 )

709 %(end_string_suffix)s""" % args, re.VERBOSE),

710 rfc=re.compile(

711 r"""

712 %(start_string_prefix)s

713 (RFC(-|\s+)?(?P<rfcnum>\d+))

714 %(end_string_suffix)s""" % args, re.VERBOSE))

715

716 self.implicit_dispatch.append((self.patterns.uri,

717 self.standalone_uri))

718 if settings.pep_references:

719 self.implicit_dispatch.append((self.patterns.pep,

720 self.pep_reference))

721 if settings.rfc_references:

722 self.implicit_dispatch.append((self.patterns.rfc,

723 self.rfc_reference))

724

725 def parse(self, text, lineno, memo, parent):

726 # Needs to be refactored for nested inline markup.

727 # Add nested_parse() method?

728 """

729 Return 2 lists: nodes (text and inline elements), and system_messages.

730

731 Using `self.patterns.initial`, a pattern which matches start-strings

732 (emphasis, strong, interpreted, phrase reference, literal,

733 substitution reference, and inline target) and complete constructs

734 (simple reference, footnote reference), search for a candidate. When

735 one is found, check for validity (e.g., not a quoted '*' character).

736 If valid, search for the corresponding end string if applicable, and

737 check it for validity. If not found or invalid, generate a warning

738 and ignore the start-string. Implicit inline markup (e.g. standalone

739 URIs) is found last.

740

741 :text: source string

742 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`

743 """

744 self.document = memo.document

745 self.language = memo.language

746 self.reporter = self.document.reporter

747 self.parent = parent

748 pattern_search = self.patterns.initial.search

749 dispatch = self.dispatch

750 remaining = escape2null(text)

751 processed = []

752 unprocessed = []

753 messages = []

754 while remaining:

755 match = pattern_search(remaining)

756 if match:

757 groups = match.groupdict()

758 method = dispatch[groups['start'] or groups['backquote']

759 or groups['refend'] or groups['fnend']]

760 before, inlines, remaining, sysmessages = method(self, match,

761 lineno)

762 unprocessed.append(before)

763 messages += sysmessages

764 if inlines:

765 processed += self.implicit_inline(''.join(unprocessed),

766 lineno)

767 processed += inlines

768 unprocessed = []

769 else:

770 break

771 remaining = ''.join(unprocessed) + remaining

772 if remaining:

773 processed += self.implicit_inline(remaining, lineno)

774 return processed, messages

775

776 # Inline object recognition

777 # -------------------------

778 # See also init_customizations().

779 non_whitespace_before = r'(?<!\s)'

780 non_whitespace_escape_before = r'(?<![\s\x00])'

781 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'

782 non_whitespace_after = r'(?!\s)'

783 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):

784 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'

785 # Valid URI characters (see RFC 2396 & RFC 2732);

786 # final \x00 allows backslash escapes in URIs:

787 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""

788 # Delimiter indicating the end of a URI (not part of the URI):

789 uri_end_delim = r"""[>]"""

790 # Last URI character; same as uric but no punctuation:

791 urilast = r"""[_~*/=+a-zA-Z0-9]"""

792 # End of a URI (either 'urilast' or 'uric followed by a

793 # uri_end_delim'):

794 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()

795 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""

796 email_pattern = r"""

797 %(emailc)s+(?:\.%(emailc)s+)* # name

798 (?<!\x00)@ # at

799 %(emailc)s+(?:\.%(emailc)s*)* # host

800 %(uri_end)s # final URI char

801 """

802

803 def quoted_start(self, match):

804 """Test if inline markup start-string is 'quoted'.

805

806 'Quoted' in this context means the start-string is enclosed in a pair

807 of matching opening/closing delimiters (not necessarily quotes)

808 or at the end of the match.

809 """

810 string = match.string

811 start = match.start()

812 if start == 0: # start-string at beginning of text

813 return False

814 prestart = string[start - 1]

815 try:

816 poststart = string[match.end()]

817 except IndexError: # start-string at end of text

818 return True # not "quoted" but no markup start-string either

819 return punctuation_chars.match_chars(prestart, poststart)

820

821 def inline_obj(self, match, lineno, end_pattern, nodeclass,

822 restore_backslashes=False):

823 string = match.string

824 matchstart = match.start('start')

825 matchend = match.end('start')

826 if self.quoted_start(match):

827 return string[:matchend], [], string[matchend:], [], ''

828 endmatch = end_pattern.search(string[matchend:])

829 if endmatch and endmatch.start(1): # 1 or more chars

830 text = endmatch.string[:endmatch.start(1)]

831 if restore_backslashes:

832 text = unescape(text, True)

833 textend = matchend + endmatch.end(1)

834 rawsource = unescape(string[matchstart:textend], True)

835 node = nodeclass(rawsource, text)

836 return (string[:matchstart], [node],

837 string[textend:], [], endmatch.group(1))

838 msg = self.reporter.warning(

839 'Inline %s start-string without end-string.'

840 % nodeclass.__name__, line=lineno)

841 text = unescape(string[matchstart:matchend], True)

842 prb = self.problematic(text, text, msg)

843 return string[:matchstart], [prb], string[matchend:], [msg], ''

844

845 def problematic(self, text, rawsource, message):

846 msgid = self.document.set_id(message, self.parent)

847 problematic = nodes.problematic(rawsource, text, refid=msgid)

848 prbid = self.document.set_id(problematic)

849 message.add_backref(prbid)

850 return problematic

851

852 def emphasis(self, match, lineno):

853 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

854 match, lineno, self.patterns.emphasis, nodes.emphasis)

855 return before, inlines, remaining, sysmessages

856

857 def strong(self, match, lineno):

858 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

859 match, lineno, self.patterns.strong, nodes.strong)

860 return before, inlines, remaining, sysmessages

861

862 def interpreted_or_phrase_ref(self, match, lineno):

863 end_pattern = self.patterns.interpreted_or_phrase_ref

864 string = match.string

865 matchstart = match.start('backquote')

866 matchend = match.end('backquote')

867 rolestart = match.start('role')

868 role = match.group('role')

869 position = ''

870 if role:

871 role = role[1:-1]

872 position = 'prefix'

873 elif self.quoted_start(match):

874 return string[:matchend], [], string[matchend:], []

875 endmatch = end_pattern.search(string[matchend:])

876 if endmatch and endmatch.start(1): # 1 or more chars

877 textend = matchend + endmatch.end()

878 if endmatch.group('role'):

879 if role:

880 msg = self.reporter.warning(

881 'Multiple roles in interpreted text (both '

882 'prefix and suffix present; only one allowed).',

883 line=lineno)

884 text = unescape(string[rolestart:textend], True)

885 prb = self.problematic(text, text, msg)

886 return string[:rolestart], [prb], string[textend:], [msg]

887 role = endmatch.group('suffix')[1:-1]

888 position = 'suffix'

889 escaped = endmatch.string[:endmatch.start(1)]

890 rawsource = unescape(string[matchstart:textend], True)

891 if rawsource[-1:] == '_':

892 if role:

893 msg = self.reporter.warning(

894 'Mismatch: both interpreted text role %s and '

895 'reference suffix.' % position, line=lineno)

896 text = unescape(string[rolestart:textend], True)

897 prb = self.problematic(text, text, msg)

898 return string[:rolestart], [prb], string[textend:], [msg]

899 return self.phrase_ref(string[:matchstart], string[textend:],

900 rawsource, escaped)

901 else:

902 rawsource = unescape(string[rolestart:textend], True)

903 nodelist, messages = self.interpreted(rawsource, escaped, role,

904 lineno)

905 return (string[:rolestart], nodelist,

906 string[textend:], messages)

907 msg = self.reporter.warning(

908 'Inline interpreted text or phrase reference start-string '

909 'without end-string.', line=lineno)

910 text = unescape(string[matchstart:matchend], True)

911 prb = self.problematic(text, text, msg)

912 return string[:matchstart], [prb], string[matchend:], [msg]

913

914 def phrase_ref(self, before, after, rawsource, escaped, text=None):

915 # `text` is ignored (since 0.16)

916 match = self.patterns.embedded_link.search(escaped)

917 if match: # embedded <URI> or <alias_>

918 text = escaped[:match.start(0)]

919 unescaped = unescape(text)

920 rawtext = unescape(text, True)

921 aliastext = match.group(2)

922 rawaliastext = unescape(aliastext, True)

923 underscore_escaped = rawaliastext.endswith(r'\_')

924 if (aliastext.endswith('_')

925 and not (underscore_escaped

926 or self.patterns.uri.match(aliastext))):

927 aliastype = 'name'

928 alias = normalize_name(unescape(aliastext[:-1]))

929 target = nodes.target(match.group(1), refname=alias)

930 else:

931 aliastype = 'uri'

932 # remove unescaped whitespace

933 alias_parts = split_escaped_whitespace(match.group(2))

934 alias = ' '.join(''.join(part.split())

935 for part in alias_parts)

936 alias = self.adjust_uri(unescape(alias))

937 if alias.endswith(r'\_'):

938 alias = alias[:-2] + '_'

939 target = nodes.target(match.group(1), refuri=alias)

940 target.referenced = 1

941 if not aliastext:

942 raise ApplicationError('problem with embedded link: %r'

943 % aliastext)

944 if not text:

945 text = alias

946 unescaped = unescape(text)

947 rawtext = rawaliastext

948 else:

949 text = escaped

950 unescaped = unescape(text)

951 target = None

952 rawtext = unescape(escaped, True)

953

954 refname = normalize_name(unescaped)

955 reference = nodes.reference(rawsource, text)

956 reference[0].rawsource = rawtext

957

958 node_list = [reference]

959

960 if rawsource[-2:] == '__':

961 if target and (aliastype == 'name'):

962 reference['refname'] = alias

963 self.document.note_refname(reference)

964 # self.document.note_indirect_target(target) # required?

965 elif target and (aliastype == 'uri'):

966 reference['refuri'] = alias

967 else:

968 reference['anonymous'] = True

969 else:

970 if target:

971 target['names'].append(refname)

972 if aliastype == 'name':

973 reference['refname'] = alias

974 self.document.note_indirect_target(target)

975 self.document.note_refname(reference)

976 else:

977 reference['refuri'] = alias

978 # target.note_referenced_by(name=refname)

979 self.document.note_implicit_target(target, self.parent)

980 node_list.append(target)

981 else:

982 reference['refname'] = refname

983 self.document.note_refname(reference)

984 return before, node_list, after, []

985

986 def adjust_uri(self, uri):

987 match = self.patterns.email.match(uri)

988 if match:

989 return 'mailto:' + uri

990 else:

991 return uri

992

993 def interpreted(self, rawsource, text, role, lineno):

994 role_fn, messages = roles.role(role, self.language, lineno,

995 self.reporter)

996 if role_fn:

997 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)

998 return nodes, messages + messages2

999 else:

1000 msg = self.reporter.error(

1001 'Unknown interpreted text role "%s".' % role,

1002 line=lineno)

1003 return ([self.problematic(rawsource, rawsource, msg)],

1004 messages + [msg])

1005

1006 def literal(self, match, lineno):

1007 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

1008 match, lineno, self.patterns.literal, nodes.literal,

1009 restore_backslashes=True)

1010 return before, inlines, remaining, sysmessages

1011

1012 def inline_internal_target(self, match, lineno):

1013 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

1014 match, lineno, self.patterns.target, nodes.target)

1015 if inlines and isinstance(inlines[0], nodes.target):

1016 assert len(inlines) == 1

1017 target = inlines[0]

1018 name = normalize_name(target.astext())

1019 target['names'].append(name)

1020 self.document.note_explicit_target(target, self.parent)

1021 return before, inlines, remaining, sysmessages

1022

1023 def substitution_reference(self, match, lineno):

1024 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

1025 match, lineno, self.patterns.substitution_ref,

1026 nodes.substitution_reference)

1027 if len(inlines) == 1:

1028 subref_node = inlines[0]

1029 if isinstance(subref_node, nodes.substitution_reference):

1030 subref_text = subref_node.astext()

1031 self.document.note_substitution_ref(subref_node, subref_text)

1032 if endstring[-1:] == '_':

1033 reference_node = nodes.reference(

1034 '|%s%s' % (subref_text, endstring), '')

1035 if endstring[-2:] == '__':

1036 reference_node['anonymous'] = True

1037 else:

1038 reference_node['refname'] = normalize_name(subref_text)

1039 self.document.note_refname(reference_node)

1040 reference_node += subref_node

1041 inlines = [reference_node]

1042 return before, inlines, remaining, sysmessages

1043

1044 def footnote_reference(self, match, lineno):

1045 """

1046 Handles `nodes.footnote_reference` and `nodes.citation_reference`

1047 elements.

1048 """

1049 label = match.group('footnotelabel')

1050 refname = normalize_name(label)

1051 string = match.string

1052 before = string[:match.start('whole')]

1053 remaining = string[match.end('whole'):]

1054 if match.group('citationlabel'):

1055 refnode = nodes.citation_reference('[%s]_' % label,

1056 refname=refname)

1057 refnode += nodes.Text(label)

1058 self.document.note_citation_ref(refnode)

1059 else:

1060 refnode = nodes.footnote_reference('[%s]_' % label)

1061 if refname[0] == '#':

1062 refname = refname[1:]

1063 refnode['auto'] = 1

1064 self.document.note_autofootnote_ref(refnode)

1065 elif refname == '*':

1066 refname = ''

1067 refnode['auto'] = '*'

1068 self.document.note_symbol_footnote_ref(

1069 refnode)

1070 else:

1071 refnode += nodes.Text(label)

1072 if refname:

1073 refnode['refname'] = refname

1074 self.document.note_footnote_ref(refnode)

1075 if utils.get_trim_footnote_ref_space(self.document.settings):

1076 before = before.rstrip()

1077 return before, [refnode], remaining, []

1078

1079 def reference(self, match, lineno, anonymous=False):

1080 referencename = match.group('refname')

1081 refname = normalize_name(referencename)

1082 referencenode = nodes.reference(

1083 referencename + match.group('refend'), referencename)

1084 referencenode[0].rawsource = referencename

1085 if anonymous:

1086 referencenode['anonymous'] = True

1087 else:

1088 referencenode['refname'] = refname

1089 self.document.note_refname(referencenode)

1090 string = match.string

1091 matchstart = match.start('whole')

1092 matchend = match.end('whole')

1093 return string[:matchstart], [referencenode], string[matchend:], []

1094

1095 def anonymous_reference(self, match, lineno):

1096 return self.reference(match, lineno, anonymous=True)

1097

1098 def standalone_uri(self, match, lineno):

1099 if (not match.group('scheme')

1100 or match.group('scheme').lower() in urischemes.schemes):

1101 if match.group('email'):

1102 addscheme = 'mailto:'

1103 else:

1104 addscheme = ''

1105 text = match.group('whole')

1106 refuri = addscheme + unescape(text)

1107 reference = nodes.reference(unescape(text, True), text,

1108 refuri=refuri)

1109 return [reference]

1110 else: # not a valid scheme

1111 raise MarkupMismatch

1112

1113 def pep_reference(self, match, lineno):

1114 text = match.group(0)

1115 if text.startswith('pep-'):

1116 pepnum = int(unescape(match.group('pepnum1')))

1117 elif text.startswith('PEP'):

1118 pepnum = int(unescape(match.group('pepnum2')))

1119 else:

1120 raise MarkupMismatch

1121 ref = (self.document.settings.pep_base_url

1122 + self.document.settings.pep_file_url_template % pepnum)

1123 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1124

1125 rfc_url = 'rfc%d.html'

1126

1127 def rfc_reference(self, match, lineno):

1128 text = match.group(0)

1129 if text.startswith('RFC'):

1130 rfcnum = int(unescape(match.group('rfcnum')))

1131 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum

1132 else:

1133 raise MarkupMismatch

1134 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1135

1136 def implicit_inline(self, text, lineno):

1137 """

1138 Check each of the patterns in `self.implicit_dispatch` for a match,

1139 and dispatch to the stored method for the pattern. Recursively check

1140 the text before and after the match. Return a list of `nodes.Text`

1141 and inline element nodes.

1142 """

1143 if not text:

1144 return []

1145 for pattern, method in self.implicit_dispatch:

1146 match = pattern.search(text)

1147 if match:

1148 try:

1149 # Must recurse on strings before *and* after the match;

1150 # there may be multiple patterns.

1151 return (self.implicit_inline(text[:match.start()], lineno)

1152 + method(match, lineno)

1153 + self.implicit_inline(text[match.end():], lineno))

1154 except MarkupMismatch:

1155 pass

1156 return [nodes.Text(text)]

1157

1158 dispatch = {'*': emphasis,

1159 '**': strong,

1160 '`': interpreted_or_phrase_ref,

1161 '``': literal,

1162 '_`': inline_internal_target,

1163 ']_': footnote_reference,

1164 '|': substitution_reference,

1165 '_': reference,

1166 '__': anonymous_reference}

1167

1168

1169def _loweralpha_to_int(s, _zero=(ord('a')-1)):

1170 return ord(s) - _zero

1171

1172

1173def _upperalpha_to_int(s, _zero=(ord('A')-1)):

1174 return ord(s) - _zero

1175

1176

1177class Body(RSTState):

1178

1179 """

1180 Generic classifier of the first line of a block.

1181 """

1182

1183 double_width_pad_char = tableparser.TableParser.double_width_pad_char

1184 """Padding character for East Asian double-width text."""

1185

1186 enum = Struct()

1187 """Enumerated list parsing information."""

1188

1189 enum.formatinfo = {

1190 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),

1191 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),

1192 'period': Struct(prefix='', suffix='.', start=0, end=-1)}

1193 enum.formats = enum.formatinfo.keys()

1194 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',

1195 'lowerroman', 'upperroman'] # ORDERED!

1196 enum.sequencepats = {'arabic': '[0-9]+',

1197 'loweralpha': '[a-z]',

1198 'upperalpha': '[A-Z]',

1199 'lowerroman': '[ivxlcdm]+',

1200 'upperroman': '[IVXLCDM]+'}

1201 enum.converters = {'arabic': int,

1202 'loweralpha': _loweralpha_to_int,

1203 'upperalpha': _upperalpha_to_int,

1204 'lowerroman': RomanNumeral.from_string,

1205 'upperroman': RomanNumeral.from_string}

1206

1207 enum.sequenceregexps = {}

1208 for sequence in enum.sequences:

1209 enum.sequenceregexps[sequence] = re.compile(

1210 enum.sequencepats[sequence] + '$')

1211

1212 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')

1213 """Matches the top (& bottom) of a full table)."""

1214

1215 simple_table_top_pat = re.compile('=+( +=+)+ *$')

1216 """Matches the top of a simple table."""

1217

1218 simple_table_border_pat = re.compile('=+[ =]*$')

1219 """Matches the bottom & header bottom of a simple table."""

1220

1221 pats = {}

1222 """Fragments of patterns used by transitions."""

1223

1224 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'

1225 pats['alpha'] = '[a-zA-Z]'

1226 pats['alphanum'] = '[a-zA-Z0-9]'

1227 pats['alphanumplus'] = '[a-zA-Z0-9_-]'

1228 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'

1229 '|%(upperroman)s|#)' % enum.sequencepats)

1230 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats

1231 # @@@ Loosen up the pattern? Allow Unicode?

1232 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats

1233 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats

1234 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats

1235 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats

1236

1237 for format in enum.formats:

1238 pats[format] = '(?P<%s>%s%s%s)' % (

1239 format, re.escape(enum.formatinfo[format].prefix),

1240 pats['enum'], re.escape(enum.formatinfo[format].suffix))

1241

1242 patterns = {

1243 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',

1244 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,

1245 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',

1246 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,

1247 'doctest': r'>>>( +|$)',

1248 'line_block': r'\|( +|$)',

1249 'grid_table_top': grid_table_top_pat,

1250 'simple_table_top': simple_table_top_pat,

1251 'explicit_markup': r'\.\.( +|$)',

1252 'anonymous': r'__( +|$)',

1253 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,

1254 'text': r''}

1255 initial_transitions = (

1256 'bullet',

1257 'enumerator',

1258 'field_marker',

1259 'option_marker',

1260 'doctest',

1261 'line_block',

1262 'grid_table_top',

1263 'simple_table_top',

1264 'explicit_markup',

1265 'anonymous',

1266 'line',

1267 'text')

1268

1269 def indent(self, match, context, next_state):

1270 """Block quote."""

1271 (indented, indent, line_offset, blank_finish

1272 ) = self.state_machine.get_indented()

1273 elements = self.block_quote(indented, line_offset)

1274 self.parent += elements

1275 if not blank_finish:

1276 self.parent += self.unindent_warning('Block quote')

1277 return context, next_state, []

1278

1279 def block_quote(self, indented, line_offset):

1280 elements = []

1281 while indented:

1282 blockquote = nodes.block_quote(rawsource='\n'.join(indented))

1283 (blockquote.source, blockquote.line

1284 ) = self.state_machine.get_source_and_line(line_offset+1)

1285 (blockquote_lines,

1286 attribution_lines,

1287 attribution_offset,

1288 indented,

1289 new_line_offset) = self.split_attribution(indented, line_offset)

1290 self.nested_parse(blockquote_lines, line_offset, blockquote)

1291 elements.append(blockquote)

1292 if attribution_lines:

1293 attribution, messages = self.parse_attribution(

1294 attribution_lines, line_offset+attribution_offset)

1295 blockquote += attribution

1296 elements += messages

1297 line_offset = new_line_offset

1298 while indented and not indented[0]:

1299 indented = indented[1:]

1300 line_offset += 1

1301 return elements

1302

1303 # U+2014 is an em-dash:

1304 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')

1305

1306 def split_attribution(self, indented, line_offset):

1307 """

1308 Check for a block quote attribution and split it off:

1309

1310 * First line after a blank line must begin with a dash ("--", "---",

1311 em-dash; matches `self.attribution_pattern`).

1312 * Every line after that must have consistent indentation.

1313 * Attributions must be preceded by block quote content.

1314

1315 Return a tuple of: (block quote content lines, attribution lines,

1316 attribution offset, remaining indented lines, remaining lines offset).

1317 """

1318 blank = None

1319 nonblank_seen = False

1320 for i in range(len(indented)):

1321 line = indented[i].rstrip()

1322 if line:

1323 if nonblank_seen and blank == i - 1: # last line blank

1324 match = self.attribution_pattern.match(line)

1325 if match:

1326 attribution_end, indent = self.check_attribution(

1327 indented, i)

1328 if attribution_end:

1329 a_lines = indented[i:attribution_end]

1330 a_lines.trim_left(match.end(), end=1)

1331 a_lines.trim_left(indent, start=1)

1332 return (indented[:i], a_lines,

1333 i, indented[attribution_end:],

1334 line_offset + attribution_end)

1335 nonblank_seen = True

1336 else:

1337 blank = i

1338 else:

1339 return indented, None, None, None, None

1340

1341 def check_attribution(self, indented, attribution_start):

1342 """

1343 Check attribution shape.

1344 Return the index past the end of the attribution, and the indent.

1345 """

1346 indent = None

1347 i = attribution_start + 1

1348 for i in range(attribution_start + 1, len(indented)):

1349 line = indented[i].rstrip()

1350 if not line:

1351 break

1352 if indent is None:

1353 indent = len(line) - len(line.lstrip())

1354 elif len(line) - len(line.lstrip()) != indent:

1355 return None, None # bad shape; not an attribution

1356 else:

1357 # return index of line after last attribution line:

1358 i += 1

1359 return i, (indent or 0)

1360

1361 def parse_attribution(self, indented, line_offset):

1362 text = '\n'.join(indented).rstrip()

1363 lineno = 1 + line_offset # line_offset is zero-based

1364 textnodes, messages = self.inline_text(text, lineno)

1365 node = nodes.attribution(text, '', *textnodes)

1366 node.source, node.line = self.state_machine.get_source_and_line(lineno)

1367 return node, messages

1368

1369 def bullet(self, match, context, next_state):

1370 """Bullet list item."""

1371 ul = nodes.bullet_list()

1372 ul.source, ul.line = self.state_machine.get_source_and_line()

1373 self.parent += ul

1374 ul['bullet'] = match.string[0]

1375 i, blank_finish = self.list_item(match.end())

1376 ul += i

1377 offset = self.state_machine.line_offset + 1 # next line

1378 new_line_offset, blank_finish = self.nested_list_parse(

1379 self.state_machine.input_lines[offset:],

1380 input_offset=self.state_machine.abs_line_offset() + 1,

1381 node=ul, initial_state='BulletList',

1382 blank_finish=blank_finish)

1383 self.goto_line(new_line_offset)

1384 if not blank_finish:

1385 self.parent += self.unindent_warning('Bullet list')

1386 return [], next_state, []

1387

1388 def list_item(self, indent):

1389 src, srcline = self.state_machine.get_source_and_line()

1390 if self.state_machine.line[indent:]:

1391 indented, line_offset, blank_finish = (

1392 self.state_machine.get_known_indented(indent))

1393 else:

1394 indented, indent, line_offset, blank_finish = (

1395 self.state_machine.get_first_known_indented(indent))

1396 listitem = nodes.list_item('\n'.join(indented))

1397 listitem.source, listitem.line = src, srcline

1398 if indented:

1399 self.nested_parse(indented, input_offset=line_offset,

1400 node=listitem)

1401 return listitem, blank_finish

1402

1403 def enumerator(self, match, context, next_state):

1404 """Enumerated List Item"""

1405 format, sequence, text, ordinal = self.parse_enumerator(match)

1406 if not self.is_enumerated_list_item(ordinal, sequence, format):

1407 raise statemachine.TransitionCorrection('text')

1408 enumlist = nodes.enumerated_list()

1409 (enumlist.source,

1410 enumlist.line) = self.state_machine.get_source_and_line()

1411 self.parent += enumlist

1412 if sequence == '#':

1413 enumlist['enumtype'] = 'arabic'

1414 else:

1415 enumlist['enumtype'] = sequence

1416 enumlist['prefix'] = self.enum.formatinfo[format].prefix

1417 enumlist['suffix'] = self.enum.formatinfo[format].suffix

1418 if ordinal != 1:

1419 enumlist['start'] = ordinal

1420 msg = self.reporter.info(

1421 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'

1422 % (text, ordinal), base_node=enumlist)

1423 self.parent += msg

1424 listitem, blank_finish = self.list_item(match.end())

1425 enumlist += listitem

1426 offset = self.state_machine.line_offset + 1 # next line

1427 newline_offset, blank_finish = self.nested_list_parse(

1428 self.state_machine.input_lines[offset:],

1429 input_offset=self.state_machine.abs_line_offset() + 1,

1430 node=enumlist, initial_state='EnumeratedList',

1431 blank_finish=blank_finish,

1432 extra_settings={'lastordinal': ordinal,

1433 'format': format,

1434 'auto': sequence == '#'})

1435 self.goto_line(newline_offset)

1436 if not blank_finish:

1437 self.parent += self.unindent_warning('Enumerated list')

1438 return [], next_state, []

1439

1440 def parse_enumerator(self, match, expected_sequence=None):

1441 """

1442 Analyze an enumerator and return the results.

1443

1444 :Return:

1445 - the enumerator format ('period', 'parens', or 'rparen'),

1446 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),

1447 - the text of the enumerator, stripped of formatting, and

1448 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;

1449 ``None`` is returned for invalid enumerator text).

1450

1451 The enumerator format has already been determined by the regular

1452 expression match. If `expected_sequence` is given, that sequence is

1453 tried first. If not, we check for Roman numeral 1. This way,

1454 single-character Roman numerals (which are also alphabetical) can be

1455 matched. If no sequence has been matched, all sequences are checked in

1456 order.

1457 """

1458 groupdict = match.groupdict()

1459 sequence = ''

1460 for format in self.enum.formats:

1461 if groupdict[format]: # was this the format matched?

1462 break # yes; keep `format`

1463 else: # shouldn't happen

1464 raise ParserError('enumerator format not matched')

1465 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501

1466 : self.enum.formatinfo[format].end]

1467 if text == '#':

1468 sequence = '#'

1469 elif expected_sequence:

1470 try:

1471 if self.enum.sequenceregexps[expected_sequence].match(text):

1472 sequence = expected_sequence

1473 except KeyError: # shouldn't happen

1474 raise ParserError('unknown enumerator sequence: %s'

1475 % sequence)

1476 elif text == 'i':

1477 sequence = 'lowerroman'

1478 elif text == 'I':

1479 sequence = 'upperroman'

1480 if not sequence:

1481 for sequence in self.enum.sequences:

1482 if self.enum.sequenceregexps[sequence].match(text):

1483 break

1484 else: # shouldn't happen

1485 raise ParserError('enumerator sequence not matched')

1486 if sequence == '#':

1487 ordinal = 1

1488 else:

1489 try:

1490 ordinal = int(self.enum.converters[sequence](text))

1491 except InvalidRomanNumeralError:

1492 ordinal = None

1493 return format, sequence, text, ordinal

1494

1495 def is_enumerated_list_item(self, ordinal, sequence, format):

1496 """

1497 Check validity based on the ordinal value and the second line.

1498

1499 Return true if the ordinal is valid and the second line is blank,

1500 indented, or starts with the next enumerator or an auto-enumerator.

1501 """

1502 if ordinal is None:

1503 return None

1504 try:

1505 next_line = self.state_machine.next_line()

1506 except EOFError: # end of input lines

1507 self.state_machine.previous_line()

1508 return 1

1509 else:

1510 self.state_machine.previous_line()

1511 if not next_line[:1].strip(): # blank or indented

1512 return 1

1513 result = self.make_enumerator(ordinal + 1, sequence, format)

1514 if result:

1515 next_enumerator, auto_enumerator = result

1516 try:

1517 if next_line.startswith((next_enumerator, auto_enumerator)):

1518 return 1

1519 except TypeError:

1520 pass

1521 return None

1522

1523 def make_enumerator(self, ordinal, sequence, format):

1524 """

1525 Construct and return the next enumerated list item marker, and an

1526 auto-enumerator ("#" instead of the regular enumerator).

1527

1528 Return ``None`` for invalid (out of range) ordinals.

1529 """

1530 if sequence == '#':

1531 enumerator = '#'

1532 elif sequence == 'arabic':

1533 enumerator = str(ordinal)

1534 else:

1535 if sequence.endswith('alpha'):

1536 if ordinal > 26:

1537 return None

1538 enumerator = chr(ordinal + ord('a') - 1)

1539 elif sequence.endswith('roman'):

1540 try:

1541 enumerator = RomanNumeral(ordinal).to_uppercase()

1542 except TypeError:

1543 return None

1544 else: # shouldn't happen

1545 raise ParserError('unknown enumerator sequence: "%s"'

1546 % sequence)

1547 if sequence.startswith('lower'):

1548 enumerator = enumerator.lower()

1549 elif sequence.startswith('upper'):

1550 enumerator = enumerator.upper()

1551 else: # shouldn't happen

1552 raise ParserError('unknown enumerator sequence: "%s"'

1553 % sequence)

1554 formatinfo = self.enum.formatinfo[format]

1555 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix

1556 + ' ')

1557 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '

1558 return next_enumerator, auto_enumerator

1559

1560 def field_marker(self, match, context, next_state):

1561 """Field list item."""

1562 field_list = nodes.field_list()

1563 self.parent += field_list

1564 field, blank_finish = self.field(match)

1565 field_list += field

1566 offset = self.state_machine.line_offset + 1 # next line

1567 newline_offset, blank_finish = self.nested_list_parse(

1568 self.state_machine.input_lines[offset:],

1569 input_offset=self.state_machine.abs_line_offset() + 1,

1570 node=field_list, initial_state='FieldList',

1571 blank_finish=blank_finish)

1572 self.goto_line(newline_offset)

1573 if not blank_finish:

1574 self.parent += self.unindent_warning('Field list')

1575 return [], next_state, []

1576

1577 def field(self, match):

1578 name = self.parse_field_marker(match)

1579 src, srcline = self.state_machine.get_source_and_line()

1580 lineno = self.state_machine.abs_line_number()

1581 (indented, indent, line_offset, blank_finish

1582 ) = self.state_machine.get_first_known_indented(match.end())

1583 field_node = nodes.field()

1584 field_node.source = src

1585 field_node.line = srcline

1586 name_nodes, name_messages = self.inline_text(name, lineno)

1587 field_node += nodes.field_name(name, '', *name_nodes)

1588 field_body = nodes.field_body('\n'.join(indented), *name_messages)

1589 field_node += field_body

1590 if indented:

1591 self.parse_field_body(indented, line_offset, field_body)

1592 return field_node, blank_finish

1593

1594 def parse_field_marker(self, match):

1595 """Extract & return field name from a field marker match."""

1596 field = match.group()[1:] # strip off leading ':'

1597 field = field[:field.rfind(':')] # strip off trailing ':' etc.

1598 return field

1599

1600 def parse_field_body(self, indented, offset, node) -> None:

1601 self.nested_parse(indented, input_offset=offset, node=node)

1602

1603 def option_marker(self, match, context, next_state):

1604 """Option list item."""

1605 optionlist = nodes.option_list()

1606 (optionlist.source, optionlist.line

1607 ) = self.state_machine.get_source_and_line()

1608 try:

1609 listitem, blank_finish = self.option_list_item(match)

1610 except MarkupError as error:

1611 # This shouldn't happen; pattern won't match.

1612 msg = self.reporter.error('Invalid option list marker: %s'

1613 % error)

1614 self.parent += msg

1615 (indented, indent, line_offset, blank_finish

1616 ) = self.state_machine.get_first_known_indented(match.end())

1617 elements = self.block_quote(indented, line_offset)

1618 self.parent += elements

1619 if not blank_finish:

1620 self.parent += self.unindent_warning('Option list')

1621 return [], next_state, []

1622 self.parent += optionlist

1623 optionlist += listitem

1624 offset = self.state_machine.line_offset + 1 # next line

1625 newline_offset, blank_finish = self.nested_list_parse(

1626 self.state_machine.input_lines[offset:],

1627 input_offset=self.state_machine.abs_line_offset() + 1,

1628 node=optionlist, initial_state='OptionList',

1629 blank_finish=blank_finish)

1630 self.goto_line(newline_offset)

1631 if not blank_finish:

1632 self.parent += self.unindent_warning('Option list')

1633 return [], next_state, []

1634

1635 def option_list_item(self, match):

1636 offset = self.state_machine.abs_line_offset()

1637 options = self.parse_option_marker(match)

1638 (indented, indent, line_offset, blank_finish

1639 ) = self.state_machine.get_first_known_indented(match.end())

1640 if not indented: # not an option list item

1641 self.goto_line(offset)

1642 raise statemachine.TransitionCorrection('text')

1643 option_group = nodes.option_group('', *options)

1644 description = nodes.description('\n'.join(indented))

1645 option_list_item = nodes.option_list_item('', option_group,

1646 description)

1647 if indented:

1648 self.nested_parse(indented, input_offset=line_offset,

1649 node=description)

1650 return option_list_item, blank_finish

1651

1652 def parse_option_marker(self, match):

1653 """

1654 Return a list of `node.option` and `node.option_argument` objects,

1655 parsed from an option marker match.

1656

1657 :Exception: `MarkupError` for invalid option markers.

1658 """

1659 optlist = []

1660 # split at ", ", except inside < > (complex arguments)

1661 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())

1662 for optionstring in optionstrings:

1663 tokens = optionstring.split()

1664 delimiter = ' '

1665 firstopt = tokens[0].split('=', 1)

1666 if len(firstopt) > 1:

1667 # "--opt=value" form

1668 tokens[:1] = firstopt

1669 delimiter = '='

1670 elif (len(tokens[0]) > 2

1671 and ((tokens[0].startswith('-')

1672 and not tokens[0].startswith('--'))

1673 or tokens[0].startswith('+'))):

1674 # "-ovalue" form

1675 tokens[:1] = [tokens[0][:2], tokens[0][2:]]

1676 delimiter = ''

1677 if len(tokens) > 1 and (tokens[1].startswith('<')

1678 and tokens[-1].endswith('>')):

1679 # "-o <value1 value2>" form; join all values into one token

1680 tokens[1:] = [' '.join(tokens[1:])]

1681 if 0 < len(tokens) <= 2:

1682 option = nodes.option(optionstring)

1683 option += nodes.option_string(tokens[0], tokens[0])

1684 if len(tokens) > 1:

1685 option += nodes.option_argument(tokens[1], tokens[1],

1686 delimiter=delimiter)

1687 optlist.append(option)

1688 else:

1689 raise MarkupError(

1690 'wrong number of option tokens (=%s), should be 1 or 2: '

1691 '"%s"' % (len(tokens), optionstring))

1692 return optlist

1693

1694 def doctest(self, match, context, next_state):

1695 line = self.document.current_line

1696 data = '\n'.join(self.state_machine.get_text_block())

1697 # TODO: Parse with `directives.body.CodeBlock` with

1698 # argument 'pycon' (Python Console) in Docutils 1.0.

1699 n = nodes.doctest_block(data, data)

1700 n.line = line

1701 self.parent += n

1702 return [], next_state, []

1703

1704 def line_block(self, match, context, next_state):

1705 """First line of a line block."""

1706 block = nodes.line_block()

1707 self.parent += block

1708 lineno = self.state_machine.abs_line_number()

1709 (block.source,

1710 block.line) = self.state_machine.get_source_and_line(lineno)

1711 line, messages, blank_finish = self.line_block_line(match, lineno)

1712 block += line

1713 self.parent += messages

1714 if not blank_finish:

1715 offset = self.state_machine.line_offset + 1 # next line

1716 new_line_offset, blank_finish = self.nested_list_parse(

1717 self.state_machine.input_lines[offset:],

1718 input_offset=self.state_machine.abs_line_offset() + 1,

1719 node=block, initial_state='LineBlock',

1720 blank_finish=False)

1721 self.goto_line(new_line_offset)

1722 if not blank_finish:

1723 self.parent += self.reporter.warning(

1724 'Line block ends without a blank line.',

1725 line=lineno+1)

1726 if len(block):

1727 if block[0].indent is None:

1728 block[0].indent = 0

1729 self.nest_line_block_lines(block)

1730 return [], next_state, []

1731

1732 def line_block_line(self, match, lineno):

1733 """Return one line element of a line_block."""

1734 (indented, indent, line_offset, blank_finish

1735 ) = self.state_machine.get_first_known_indented(match.end(),

1736 until_blank=True)

1737 text = '\n'.join(indented)

1738 text_nodes, messages = self.inline_text(text, lineno)

1739 line = nodes.line(text, '', *text_nodes)

1740 (line.source,

1741 line.line) = self.state_machine.get_source_and_line(lineno)

1742 if match.string.rstrip() != '|': # not empty

1743 line.indent = len(match.group(1)) - 1

1744 return line, messages, blank_finish

1745

1746 def nest_line_block_lines(self, block) -> None:

1747 for index in range(1, len(block)):

1748 if block[index].indent is None:

1749 block[index].indent = block[index - 1].indent

1750 self.nest_line_block_segment(block)

1751

1752 def nest_line_block_segment(self, block) -> None:

1753 indents = [item.indent for item in block]

1754 least = min(indents)

1755 new_items = []

1756 new_block = nodes.line_block()

1757 for item in block:

1758 if item.indent > least:

1759 new_block.append(item)

1760 else:

1761 if len(new_block):

1762 self.nest_line_block_segment(new_block)

1763 new_items.append(new_block)

1764 new_block = nodes.line_block()

1765 new_items.append(item)

1766 if len(new_block):

1767 self.nest_line_block_segment(new_block)

1768 new_items.append(new_block)

1769 block[:] = new_items

1770

1771 def grid_table_top(self, match, context, next_state):

1772 """Top border of a full table."""

1773 return self.table_top(match, context, next_state,

1774 self.isolate_grid_table,

1775 tableparser.GridTableParser)

1776

1777 def simple_table_top(self, match, context, next_state):

1778 """Top border of a simple table."""

1779 return self.table_top(match, context, next_state,

1780 self.isolate_simple_table,

1781 tableparser.SimpleTableParser)

1782

1783 def table_top(self, match, context, next_state,

1784 isolate_function, parser_class):

1785 """Top border of a generic table."""

1786 nodelist, blank_finish = self.table(isolate_function, parser_class)

1787 self.parent += nodelist

1788 if not blank_finish:

1789 msg = self.reporter.warning(

1790 'Blank line required after table.',

1791 line=self.state_machine.abs_line_number()+1)

1792 self.parent += msg

1793 return [], next_state, []

1794

1795 def table(self, isolate_function, parser_class):

1796 """Parse a table."""

1797 block, messages, blank_finish = isolate_function()

1798 if block:

1799 try:

1800 parser = parser_class()

1801 tabledata = parser.parse(block)

1802 tableline = (self.state_machine.abs_line_number() - len(block)

1803 + 1)

1804 table = self.build_table(tabledata, tableline)

1805 nodelist = [table] + messages

1806 except tableparser.TableMarkupError as err:

1807 nodelist = self.malformed_table(block, ' '.join(err.args),

1808 offset=err.offset) + messages

1809 else:

1810 nodelist = messages

1811 return nodelist, blank_finish

1812

1813 def isolate_grid_table(self):

1814 messages = []

1815 blank_finish = True

1816 try:

1817 block = self.state_machine.get_text_block(flush_left=True)

1818 except statemachine.UnexpectedIndentationError as err:

1819 block, src, srcline = err.args

1820 messages.append(self.reporter.error('Unexpected indentation.',

1821 source=src, line=srcline))

1822 blank_finish = False

1823 block.disconnect()

1824 # for East Asian chars:

1825 block.pad_double_width(self.double_width_pad_char)

1826 width = len(block[0].strip())

1827 for i in range(len(block)):

1828 block[i] = block[i].strip()

1829 if block[i][0] not in '+|': # check left edge

1830 blank_finish = False

1831 self.state_machine.previous_line(len(block) - i)

1832 del block[i:]

1833 break

1834 if not self.grid_table_top_pat.match(block[-1]): # find bottom

1835 # from second-last to third line of table:

1836 for i in range(len(block) - 2, 1, -1):

1837 if self.grid_table_top_pat.match(block[i]):

1838 self.state_machine.previous_line(len(block) - i + 1)

1839 del block[i+1:]

1840 blank_finish = False

1841 break

1842 else:

1843 detail = 'Bottom border missing or corrupt.'

1844 messages.extend(self.malformed_table(block, detail, i))

1845 return [], messages, blank_finish

1846 for i in range(len(block)): # check right edge

1847 if len(strip_combining_chars(block[i])

1848 ) != width or block[i][-1] not in '+|':

1849 detail = 'Right border not aligned or missing.'

1850 messages.extend(self.malformed_table(block, detail, i))

1851 return [], messages, blank_finish

1852 return block, messages, blank_finish

1853

1854 def isolate_simple_table(self):

1855 start = self.state_machine.line_offset

1856 lines = self.state_machine.input_lines

1857 limit = len(lines) - 1

1858 toplen = len(lines[start].strip())

1859 pattern_match = self.simple_table_border_pat.match

1860 found = 0

1861 found_at = None

1862 i = start + 1

1863 while i <= limit:

1864 line = lines[i]

1865 match = pattern_match(line)

1866 if match:

1867 if len(line.strip()) != toplen:

1868 self.state_machine.next_line(i - start)

1869 messages = self.malformed_table(

1870 lines[start:i+1], 'Bottom border or header rule does '

1871 'not match top border.', i-start)

1872 return [], messages, i == limit or not lines[i+1].strip()

1873 found += 1

1874 found_at = i

1875 if found == 2 or i == limit or not lines[i+1].strip():

1876 end = i

1877 break

1878 i += 1

1879 else: # reached end of input_lines

1880 details = 'No bottom table border found'

1881 if found:

1882 details += ' or no blank line after table bottom'

1883 self.state_machine.next_line(found_at - start)

1884 block = lines[start:found_at+1]

1885 else:

1886 self.state_machine.next_line(i - start - 1)

1887 block = lines[start:]

1888 messages = self.malformed_table(block, details + '.')

1889 return [], messages, not found

1890 self.state_machine.next_line(end - start)

1891 block = lines[start:end+1]

1892 # for East Asian chars:

1893 block.pad_double_width(self.double_width_pad_char)

1894 return block, [], end == limit or not lines[end+1].strip()

1895

1896 def malformed_table(self, block, detail='', offset=0):

1897 block.replace(self.double_width_pad_char, '')

1898 data = '\n'.join(block)

1899 message = 'Malformed table.'

1900 startline = self.state_machine.abs_line_number() - len(block) + 1

1901 if detail:

1902 message += '\n' + detail

1903 error = self.reporter.error(message, nodes.literal_block(data, data),

1904 line=startline+offset)

1905 return [error]

1906

1907 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):

1908 colwidths, headrows, bodyrows = tabledata

1909 table = nodes.table()

1910 (table.source,

1911 table.line) = self.state_machine.get_source_and_line(tableline)

1912 if widths == 'auto':

1913 table['classes'] += ['colwidths-auto']

1914 elif widths: # "grid" or list of integers

1915 table['classes'] += ['colwidths-given']

1916 tgroup = nodes.tgroup(cols=len(colwidths))

1917 table += tgroup

1918 for colwidth in colwidths:

1919 colspec = nodes.colspec(colwidth=colwidth)

1920 if stub_columns:

1921 colspec.attributes['stub'] = True

1922 stub_columns -= 1

1923 tgroup += colspec

1924 if headrows:

1925 thead = nodes.thead()

1926 tgroup += thead

1927 for row in headrows:

1928 thead += self.build_table_row(row, tableline)

1929 tbody = nodes.tbody()

1930 tgroup += tbody

1931 for row in bodyrows:

1932 tbody += self.build_table_row(row, tableline)

1933 return table

1934

1935 def build_table_row(self, rowdata, tableline):

1936 row = nodes.row()

1937 for cell in rowdata:

1938 if cell is None:

1939 continue

1940 morerows, morecols, offset, cellblock = cell

1941 attributes = {}

1942 if morerows:

1943 attributes['morerows'] = morerows

1944 if morecols:

1945 attributes['morecols'] = morecols

1946 entry = nodes.entry(**attributes)

1947 row += entry

1948 if ''.join(cellblock):

1949 self.nested_parse(cellblock, input_offset=tableline+offset-1,

1950 node=entry)

1951 return row

1952

1953 explicit = Struct()

1954 """Patterns and constants used for explicit markup recognition."""

1955

1956 explicit.patterns = Struct(

1957 target=re.compile(r"""

1958 (

1959 _ # anonymous target

1960 | # *OR*

1961 (?!_) # no underscore at the beginning

1962 (?P<quote>`?) # optional open quote

1963 (?![ `]) # first char. not space or

1964 # backquote

1965 (?P<name> # reference name

1966 .+?

1967 )

1968 %(non_whitespace_escape_before)s

1969 (?P=quote) # close quote if open quote used

1970 )

1971 (?<!(?<!\x00):) # no unescaped colon at end

1972 %(non_whitespace_escape_before)s

1973 [ ]? # optional space

1974 : # end of reference name

1975 ([ ]+|$) # followed by whitespace

1976 """ % vars(Inliner), re.VERBOSE),

1977 reference=re.compile(r"""

1978 (

1979 (?P<simple>%(simplename)s)_

1980 | # *OR*

1981 ` # open backquote

1982 (?![ ]) # not space

1983 (?P<phrase>.+?) # hyperlink phrase

1984 %(non_whitespace_escape_before)s

1985 `_ # close backquote,

1986 # reference mark

1987 )

1988 $ # end of string

1989 """ % vars(Inliner), re.VERBOSE),

1990 substitution=re.compile(r"""

1991 (

1992 (?![ ]) # first char. not space

1993 (?P<name>.+?) # substitution text

1994 %(non_whitespace_escape_before)s

1995 \| # close delimiter

1996 )

1997 ([ ]+|$) # followed by whitespace

1998 """ % vars(Inliner),

1999 re.VERBOSE),)

2000

2001 def footnote(self, match):

2002 src, srcline = self.state_machine.get_source_and_line()

2003 (indented, indent, offset, blank_finish

2004 ) = self.state_machine.get_first_known_indented(match.end())

2005 label = match.group(1)

2006 name = normalize_name(label)

2007 footnote = nodes.footnote('\n'.join(indented))

2008 footnote.source = src

2009 footnote.line = srcline

2010 if name[0] == '#': # auto-numbered

2011 name = name[1:] # autonumber label

2012 footnote['auto'] = 1

2013 if name:

2014 footnote['names'].append(name)

2015 self.document.note_autofootnote(footnote)

2016 elif name == '*': # auto-symbol

2017 name = ''

2018 footnote['auto'] = '*'

2019 self.document.note_symbol_footnote(footnote)

2020 else: # manually numbered

2021 footnote += nodes.label('', label)

2022 footnote['names'].append(name)

2023 self.document.note_footnote(footnote)

2024 if name:

2025 self.document.note_explicit_target(footnote, footnote)

2026 else:

2027 self.document.set_id(footnote, footnote)

2028 if indented:

2029 self.nested_parse(indented, input_offset=offset, node=footnote)

2030 else:

2031 footnote += self.reporter.warning('Footnote content expected.')

2032 return [footnote], blank_finish

2033

2034 def citation(self, match):

2035 src, srcline = self.state_machine.get_source_and_line()

2036 (indented, indent, offset, blank_finish

2037 ) = self.state_machine.get_first_known_indented(match.end())

2038 label = match.group(1)

2039 name = normalize_name(label)

2040 citation = nodes.citation('\n'.join(indented))

2041 citation.source = src

2042 citation.line = srcline

2043 citation += nodes.label('', label)

2044 citation['names'].append(name)

2045 self.document.note_citation(citation)

2046 self.document.note_explicit_target(citation, citation)

2047 if indented:

2048 self.nested_parse(indented, input_offset=offset, node=citation)

2049 else:

2050 citation += self.reporter.warning('Citation content expected.')

2051 return [citation], blank_finish

2052

2053 def hyperlink_target(self, match):

2054 pattern = self.explicit.patterns.target

2055 lineno = self.state_machine.abs_line_number()

2056 (block, indent, offset, blank_finish

2057 ) = self.state_machine.get_first_known_indented(

2058 match.end(), until_blank=True, strip_indent=False)

2059 blocktext = match.string[:match.end()] + '\n'.join(block)

2060 block = [escape2null(line) for line in block]

2061 escaped = block[0]

2062 blockindex = 0

2063 while True:

2064 targetmatch = pattern.match(escaped)

2065 if targetmatch:

2066 break

2067 blockindex += 1

2068 try:

2069 escaped += block[blockindex]

2070 except IndexError:

2071 raise MarkupError('malformed hyperlink target.')

2072 del block[:blockindex]

2073 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()

2074 target = self.make_target(block, blocktext, lineno,

2075 targetmatch.group('name'))

2076 return [target], blank_finish

2077

2078 def make_target(self, block, block_text, lineno, target_name):

2079 target_type, data = self.parse_target(block, block_text, lineno)

2080 if target_type == 'refname':

2081 target = nodes.target(block_text, '', refname=normalize_name(data))

2082 self.add_target(target_name, '', target, lineno)

2083 self.document.note_indirect_target(target)

2084 return target

2085 elif target_type == 'refuri':

2086 target = nodes.target(block_text, '')

2087 self.add_target(target_name, data, target, lineno)

2088 return target

2089 else:

2090 return data

2091

2092 def parse_target(self, block, block_text, lineno):

2093 """

2094 Determine the type of reference of a target.

2095

2096 :Return: A 2-tuple, one of:

2097

2098 - 'refname' and the indirect reference name

2099 - 'refuri' and the URI

2100 - 'malformed' and a system_message node

2101 """

2102 if block and block[-1].strip()[-1:] == '_': # possible indirect target

2103 reference = ' '.join(line.strip() for line in block)

2104 refname = self.is_reference(reference)

2105 if refname:

2106 return 'refname', refname

2107 ref_parts = split_escaped_whitespace(' '.join(block))

2108 reference = ' '.join(''.join(unescape(part).split())

2109 for part in ref_parts)

2110 return 'refuri', reference

2111

2112 def is_reference(self, reference):

2113 match = self.explicit.patterns.reference.match(

2114 whitespace_normalize_name(reference))

2115 if not match:

2116 return None

2117 return unescape(match.group('simple') or match.group('phrase'))

2118

2119 def add_target(self, targetname, refuri, target, lineno):

2120 target.line = lineno

2121 if targetname:

2122 name = normalize_name(unescape(targetname))

2123 target['names'].append(name)

2124 if refuri:

2125 uri = self.inliner.adjust_uri(refuri)

2126 if uri:

2127 target['refuri'] = uri

2128 else:

2129 raise ApplicationError('problem with URI: %r' % refuri)

2130 self.document.note_explicit_target(target, self.parent)

2131 else: # anonymous target

2132 if refuri:

2133 target['refuri'] = refuri

2134 target['anonymous'] = True

2135 self.document.note_anonymous_target(target)

2136

2137 def substitution_def(self, match):

2138 pattern = self.explicit.patterns.substitution

2139 src, srcline = self.state_machine.get_source_and_line()

2140 (block, indent, offset, blank_finish

2141 ) = self.state_machine.get_first_known_indented(match.end(),

2142 strip_indent=False)

2143 blocktext = (match.string[:match.end()] + '\n'.join(block))

2144 block.disconnect()

2145 escaped = escape2null(block[0].rstrip())

2146 blockindex = 0

2147 while True:

2148 subdefmatch = pattern.match(escaped)

2149 if subdefmatch:

2150 break

2151 blockindex += 1

2152 try:

2153 escaped = escaped + ' ' + escape2null(

2154 block[blockindex].strip())

2155 except IndexError:

2156 raise MarkupError('malformed substitution definition.')

2157 del block[:blockindex] # strip out the substitution marker

2158 start = subdefmatch.end()-len(escaped)-1

2159 block[0] = (block[0].strip() + ' ')[start:-1]

2160 if not block[0]:

2161 del block[0]

2162 offset += 1

2163 while block and not block[-1].strip():

2164 block.pop()

2165 subname = subdefmatch.group('name')

2166 substitution_node = nodes.substitution_definition(blocktext)

2167 substitution_node.source = src

2168 substitution_node.line = srcline

2169 if not block:

2170 msg = self.reporter.warning(

2171 'Substitution definition "%s" missing contents.' % subname,

2172 nodes.literal_block(blocktext, blocktext),

2173 source=src, line=srcline)

2174 return [msg], blank_finish

2175 block[0] = block[0].strip()

2176 substitution_node['names'].append(

2177 nodes.whitespace_normalize_name(subname))

2178 new_abs_offset, blank_finish = self.nested_list_parse(

2179 block, input_offset=offset, node=substitution_node,

2180 initial_state='SubstitutionDef', blank_finish=blank_finish)

2181 i = 0

2182 for node in substitution_node[:]:

2183 if not (isinstance(node, nodes.Inline)

2184 or isinstance(node, nodes.Text)):

2185 self.parent += substitution_node[i]

2186 del substitution_node[i]

2187 else:

2188 i += 1

2189 for node in substitution_node.findall(nodes.Element,

2190 include_self=False):

2191 if isinstance(node, nodes.problematic):

2192 msg = self.reporter.error(

2193 'Problematic content in substitution definition',

2194 nodes.literal_block('', blocktext),

2195 source=src, line=srcline)

2196 msg.append(nodes.block_quote(

2197 '', nodes.paragraph('', '', *substitution_node.children)))

2198 return [msg], blank_finish

2199 illegal = self.disallowed_inside_substitution_definitions(node)

2200 if illegal:

2201 msg = self.reporter.error(f'{illegal} are not supported in '

2202 'a substitution definition.',

2203 nodes.literal_block('', blocktext),

2204 source=src, line=srcline)

2205 return [msg], blank_finish

2206 if len(substitution_node) == 0:

2207 msg = self.reporter.warning(

2208 'Substitution definition "%s" empty or invalid.' % subname,

2209 nodes.literal_block(blocktext, blocktext),

2210 source=src, line=srcline)

2211 return [msg], blank_finish

2212 self.document.note_substitution_def(

2213 substitution_node, subname, self.parent)

2214 return [substitution_node], blank_finish

2215

2216 def disallowed_inside_substitution_definitions(self, node) -> str:

2217 if isinstance(node, nodes.reference) and node.get('anonymous'):

2218 return 'Anonymous references'

2219 if isinstance(node, nodes.footnote_reference) and node.get('auto'):

2220 return 'References to auto-numbered and auto-symbol footnotes'

2221 if node['names'] or node['ids']:

2222 return 'Targets (names and identifiers)'

2223 else:

2224 return ''

2225

2226 def directive(self, match, **option_presets):

2227 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""

2228 type_name = match.group(1)

2229 directive_class, messages = directives.directive(

2230 type_name, self.memo.language, self.document)

2231 self.parent += messages

2232 if directive_class:

2233 return self.run_directive(

2234 directive_class, match, type_name, option_presets)

2235 else:

2236 return self.unknown_directive(type_name)

2237

2238 def run_directive(self, directive, match, type_name, option_presets):

2239 """

2240 Parse a directive then run its directive function.

2241

2242 Parameters:

2243

2244 - `directive`: The class implementing the directive. Must be

2245 a subclass of `rst.Directive`.

2246

2247 - `match`: A regular expression match object which matched the first

2248 line of the directive.

2249

2250 - `type_name`: The directive name, as used in the source text.

2251

2252 - `option_presets`: A dictionary of preset options, defaults for the

2253 directive options. Currently, only an "alt" option is passed by

2254 substitution definitions (value: the substitution name), which may

2255 be used by an embedded image directive.

2256

2257 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.

2258 """

2259 if isinstance(directive, (FunctionType, MethodType)):

2260 from docutils.parsers.rst import convert_directive_function

2261 directive = convert_directive_function(directive)

2262 lineno = self.state_machine.abs_line_number()

2263 initial_line_offset = self.state_machine.line_offset

2264 (indented, indent, line_offset, blank_finish

2265 ) = self.state_machine.get_first_known_indented(match.end(),

2266 strip_top=0)

2267 block_text = '\n'.join(self.state_machine.input_lines[

2268 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501

2269 try:

2270 arguments, options, content, content_offset = (

2271 self.parse_directive_block(indented, line_offset,

2272 directive, option_presets))

2273 except MarkupError as detail:

2274 error = self.reporter.error(

2275 'Error in "%s" directive:\n%s.' % (type_name,

2276 ' '.join(detail.args)),

2277 nodes.literal_block(block_text, block_text), line=lineno)

2278 return [error], blank_finish

2279 directive_instance = directive(

2280 type_name, arguments, options, content, lineno,

2281 content_offset, block_text, self, self.state_machine)

2282 try:

2283 result = directive_instance.run()

2284 except docutils.parsers.rst.DirectiveError as error:

2285 msg_node = self.reporter.system_message(error.level, error.msg,

2286 line=lineno)

2287 msg_node += nodes.literal_block(block_text, block_text)

2288 result = [msg_node]

2289 assert isinstance(result, list), \

2290 'Directive "%s" must return a list of nodes.' % type_name

2291 for i in range(len(result)):

2292 assert isinstance(result[i], nodes.Node), \

2293 ('Directive "%s" returned non-Node object (index %s): %r'

2294 % (type_name, i, result[i]))

2295 return (result,

2296 blank_finish or self.state_machine.is_next_line_blank())

2297

2298 def parse_directive_block(self, indented, line_offset, directive,

2299 option_presets):

2300 option_spec = directive.option_spec

2301 has_content = directive.has_content

2302 if indented and not indented[0].strip():

2303 indented.trim_start()

2304 line_offset += 1

2305 while indented and not indented[-1].strip():

2306 indented.trim_end()

2307 if indented and (directive.required_arguments

2308 or directive.optional_arguments

2309 or option_spec):

2310 for i, line in enumerate(indented):

2311 if not line.strip():

2312 break

2313 else:

2314 i += 1

2315 arg_block = indented[:i]

2316 content = indented[i+1:]

2317 content_offset = line_offset + i + 1

2318 else:

2319 content = indented

2320 content_offset = line_offset

2321 arg_block = []

2322 if option_spec:

2323 options, arg_block = self.parse_directive_options(

2324 option_presets, option_spec, arg_block)

2325 else:

2326 options = {}

2327 if arg_block and not (directive.required_arguments

2328 or directive.optional_arguments):

2329 content = arg_block + indented[i:]

2330 content_offset = line_offset

2331 arg_block = []

2332 while content and not content[0].strip():

2333 content.trim_start()

2334 content_offset += 1

2335 if directive.required_arguments or directive.optional_arguments:

2336 arguments = self.parse_directive_arguments(

2337 directive, arg_block)

2338 else:

2339 arguments = []

2340 if content and not has_content:

2341 raise MarkupError('no content permitted')

2342 return arguments, options, content, content_offset

2343

2344 def parse_directive_options(self, option_presets, option_spec, arg_block):

2345 options = option_presets.copy()

2346 for i, line in enumerate(arg_block):

2347 if re.match(Body.patterns['field_marker'], line):

2348 opt_block = arg_block[i:]

2349 arg_block = arg_block[:i]

2350 break

2351 else:

2352 opt_block = []

2353 if opt_block:

2354 success, data = self.parse_extension_options(option_spec,

2355 opt_block)

2356 if success: # data is a dict of options

2357 options.update(data)

2358 else: # data is an error string

2359 raise MarkupError(data)

2360 return options, arg_block

2361

2362 def parse_directive_arguments(self, directive, arg_block):

2363 required = directive.required_arguments

2364 optional = directive.optional_arguments

2365 arg_text = '\n'.join(arg_block)

2366 arguments = arg_text.split()

2367 if len(arguments) < required:

2368 raise MarkupError('%s argument(s) required, %s supplied'

2369 % (required, len(arguments)))

2370 elif len(arguments) > required + optional:

2371 if directive.final_argument_whitespace:

2372 arguments = arg_text.split(None, required + optional - 1)

2373 else:

2374 raise MarkupError(

2375 'maximum %s argument(s) allowed, %s supplied'

2376 % (required + optional, len(arguments)))

2377 return arguments

2378

2379 def parse_extension_options(self, option_spec, datalines):

2380 """

2381 Parse `datalines` for a field list containing extension options

2382 matching `option_spec`.

2383

2384 :Parameters:

2385 - `option_spec`: a mapping of option name to conversion

2386 function, which should raise an exception on bad input.

2387 - `datalines`: a list of input strings.

2388

2389 :Return:

2390 - Success value, 1 or 0.

2391 - An option dictionary on success, an error string on failure.

2392 """

2393 node = nodes.field_list()

2394 newline_offset, blank_finish = self.nested_list_parse(

2395 datalines, 0, node, initial_state='ExtensionOptions',

2396 blank_finish=True)

2397 if newline_offset != len(datalines): # incomplete parse of block

2398 return 0, 'invalid option block'

2399 try:

2400 options = utils.extract_extension_options(node, option_spec)

2401 except KeyError as detail:

2402 return 0, 'unknown option: "%s"' % detail.args[0]

2403 except (ValueError, TypeError) as detail:

2404 return 0, 'invalid option value: %s' % ' '.join(detail.args)

2405 except utils.ExtensionOptionError as detail:

2406 return 0, 'invalid option data: %s' % ' '.join(detail.args)

2407 if blank_finish:

2408 return 1, options

2409 else:

2410 return 0, 'option data incompletely parsed'

2411

2412 def unknown_directive(self, type_name):

2413 lineno = self.state_machine.abs_line_number()

2414 (indented, indent, offset, blank_finish

2415 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)

2416 text = '\n'.join(indented)

2417 error = self.reporter.error('Unknown directive type "%s".' % type_name,

2418 nodes.literal_block(text, text),

2419 line=lineno)

2420 return [error], blank_finish

2421

2422 def comment(self, match):

2423 if self.state_machine.is_next_line_blank():

2424 first_comment_line = match.string[match.end():]

2425 if not first_comment_line.strip(): # empty comment

2426 return [nodes.comment()], True # "A tiny but practical wart."

2427 if first_comment_line.startswith('end of inclusion from "'):

2428 # cf. parsers.rst.directives.misc.Include

2429 self.document.include_log.pop()

2430 return [], True

2431 (indented, indent, offset, blank_finish

2432 ) = self.state_machine.get_first_known_indented(match.end())

2433 while indented and not indented[-1].strip():

2434 indented.trim_end()

2435 text = '\n'.join(indented)

2436 return [nodes.comment(text, text)], blank_finish

2437

2438 explicit.constructs = [

2439 (footnote,

2440 re.compile(r"""

2441 \.\.[ ]+ # explicit markup start

2442 \[

2443 ( # footnote label:

2444 [0-9]+ # manually numbered footnote

2445 | # *OR*

2446 \# # anonymous auto-numbered footnote

2447 | # *OR*

2448 \#%s # auto-number ed?) footnote label

2449 | # *OR*

2450 \* # auto-symbol footnote

2451 )

2452 \]

2453 ([ ]+|$) # whitespace or end of line

2454 """ % Inliner.simplename, re.VERBOSE)),

2455 (citation,

2456 re.compile(r"""

2457 \.\.[ ]+ # explicit markup start

2458 \[(%s)\] # citation label

2459 ([ ]+|$) # whitespace or end of line

2460 """ % Inliner.simplename, re.VERBOSE)),

2461 (hyperlink_target,

2462 re.compile(r"""

2463 \.\.[ ]+ # explicit markup start

2464 _ # target indicator

2465 (?![ ]|$) # first char. not space or EOL

2466 """, re.VERBOSE)),

2467 (substitution_def,

2468 re.compile(r"""

2469 \.\.[ ]+ # explicit markup start

2470 \| # substitution indicator

2471 (?![ ]|$) # first char. not space or EOL

2472 """, re.VERBOSE)),

2473 (directive,

2474 re.compile(r"""

2475 \.\.[ ]+ # explicit markup start

2476 (%s) # directive name

2477 [ ]? # optional space

2478 :: # directive delimiter

2479 ([ ]+|$) # whitespace or end of line

2480 """ % Inliner.simplename, re.VERBOSE))]

2481

2482 def explicit_markup(self, match, context, next_state):

2483 """Footnotes, hyperlink targets, directives, comments."""

2484 nodelist, blank_finish = self.explicit_construct(match)

2485 self.parent += nodelist

2486 self.explicit_list(blank_finish)

2487 return [], next_state, []

2488

2489 def explicit_construct(self, match):

2490 """Determine which explicit construct this is, parse & return it."""

2491 errors = []

2492 for method, pattern in self.explicit.constructs:

2493 expmatch = pattern.match(match.string)

2494 if expmatch:

2495 try:

2496 return method(self, expmatch)

2497 except MarkupError as error:

2498 lineno = self.state_machine.abs_line_number()

2499 message = ' '.join(error.args)

2500 errors.append(self.reporter.warning(message, line=lineno))

2501 break

2502 nodelist, blank_finish = self.comment(match)

2503 return nodelist + errors, blank_finish

2504

2505 def explicit_list(self, blank_finish) -> None:

2506 """

2507 Create a nested state machine for a series of explicit markup

2508 constructs (including anonymous hyperlink targets).

2509 """

2510 offset = self.state_machine.line_offset + 1 # next line

2511 newline_offset, blank_finish = self.nested_list_parse(

2512 self.state_machine.input_lines[offset:],

2513 input_offset=self.state_machine.abs_line_offset() + 1,

2514 node=self.parent, initial_state='Explicit',

2515 blank_finish=blank_finish)

2516 self.goto_line(newline_offset)

2517 if not blank_finish:

2518 self.parent += self.unindent_warning('Explicit markup')

2519

2520 def anonymous(self, match, context, next_state):

2521 """Anonymous hyperlink targets."""

2522 nodelist, blank_finish = self.anonymous_target(match)

2523 self.parent += nodelist

2524 self.explicit_list(blank_finish)

2525 return [], next_state, []

2526

2527 def anonymous_target(self, match):

2528 lineno = self.state_machine.abs_line_number()

2529 (block, indent, offset, blank_finish

2530 ) = self.state_machine.get_first_known_indented(match.end(),

2531 until_blank=True)

2532 blocktext = match.string[:match.end()] + '\n'.join(block)

2533 block = [escape2null(line) for line in block]

2534 target = self.make_target(block, blocktext, lineno, '')

2535 return [target], blank_finish

2536

2537 def line(self, match, context, next_state):

2538 """Section title overline or transition marker."""

2539 if self.state_machine.match_titles:

2540 return [match.string], 'Line', []

2541 elif match.string.strip() == '::':

2542 raise statemachine.TransitionCorrection('text')

2543 elif len(match.string.strip()) < 4:

2544 msg = self.reporter.info(

2545 'Unexpected possible title overline or transition.\n'

2546 "Treating it as ordinary text because it's so short.",

2547 line=self.state_machine.abs_line_number())

2548 self.parent += msg

2549 raise statemachine.TransitionCorrection('text')

2550 else:

2551 blocktext = self.state_machine.line

2552 msg = self.reporter.error(

2553 'Unexpected section title or transition.',

2554 nodes.literal_block(blocktext, blocktext),

2555 line=self.state_machine.abs_line_number())

2556 self.parent += msg

2557 return [], next_state, []

2558

2559 def text(self, match, context, next_state):

2560 """Titles, definition lists, paragraphs."""

2561 return [match.string], 'Text', []

2562

2563

2564class RFC2822Body(Body):

2565

2566 """

2567 RFC2822 headers are only valid as the first constructs in documents. As

2568 soon as anything else appears, the `Body` state should take over.

2569 """

2570

2571 patterns = Body.patterns.copy() # can't modify the original

2572 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'

2573 initial_transitions = [(name, 'Body')

2574 for name in Body.initial_transitions]

2575 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'

2576

2577 def rfc2822(self, match, context, next_state):

2578 """RFC2822-style field list item."""

2579 fieldlist = nodes.field_list(classes=['rfc2822'])

2580 self.parent += fieldlist

2581 field, blank_finish = self.rfc2822_field(match)

2582 fieldlist += field

2583 offset = self.state_machine.line_offset + 1 # next line

2584 newline_offset, blank_finish = self.nested_list_parse(

2585 self.state_machine.input_lines[offset:],

2586 input_offset=self.state_machine.abs_line_offset() + 1,

2587 node=fieldlist, initial_state='RFC2822List',

2588 blank_finish=blank_finish)

2589 self.goto_line(newline_offset)

2590 if not blank_finish:

2591 self.parent += self.unindent_warning(

2592 'RFC2822-style field list')

2593 return [], next_state, []

2594

2595 def rfc2822_field(self, match):

2596 name = match.string[:match.string.find(':')]

2597 (indented, indent, line_offset, blank_finish

2598 ) = self.state_machine.get_first_known_indented(match.end(),

2599 until_blank=True)

2600 fieldnode = nodes.field()

2601 fieldnode += nodes.field_name(name, name)

2602 fieldbody = nodes.field_body('\n'.join(indented))

2603 fieldnode += fieldbody

2604 if indented:

2605 self.nested_parse(indented, input_offset=line_offset,

2606 node=fieldbody)

2607 return fieldnode, blank_finish

2608

2609

2610class SpecializedBody(Body):

2611

2612 """

2613 Superclass for second and subsequent compound element members. Compound

2614 elements are lists and list-like constructs.

2615

2616 All transition methods are disabled (redefined as `invalid_input`).

2617 Override individual methods in subclasses to re-enable.

2618

2619 For example, once an initial bullet list item, say, is recognized, the

2620 `BulletList` subclass takes over, with a "bullet_list" node as its

2621 container. Upon encountering the initial bullet list item, `Body.bullet`

2622 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which

2623 starts up a nested parsing session with `BulletList` as the initial state.

2624 Only the ``bullet`` transition method is enabled in `BulletList`; as long

2625 as only bullet list items are encountered, they are parsed and inserted

2626 into the container. The first construct which is *not* a bullet list item

2627 triggers the `invalid_input` method, which ends the nested parse and

2628 closes the container. `BulletList` needs to recognize input that is

2629 invalid in the context of a bullet list, which means everything *other

2630 than* bullet list items, so it inherits the transition list created in

2631 `Body`.

2632 """

2633

2634 def invalid_input(self, match=None, context=None, next_state=None):

2635 """Not a compound element member. Abort this state machine."""

2636 self.state_machine.previous_line() # back up so parent SM can reassess

2637 raise EOFError

2638

2639 indent = invalid_input

2640 bullet = invalid_input

2641 enumerator = invalid_input

2642 field_marker = invalid_input

2643 option_marker = invalid_input

2644 doctest = invalid_input

2645 line_block = invalid_input

2646 grid_table_top = invalid_input

2647 simple_table_top = invalid_input

2648 explicit_markup = invalid_input

2649 anonymous = invalid_input

2650 line = invalid_input

2651 text = invalid_input

2652

2653

2654class BulletList(SpecializedBody):

2655

2656 """Second and subsequent bullet_list list_items."""

2657

2658 def bullet(self, match, context, next_state):

2659 """Bullet list item."""

2660 if match.string[0] != self.parent['bullet']:

2661 # different bullet: new list

2662 self.invalid_input()

2663 listitem, blank_finish = self.list_item(match.end())

2664 self.parent += listitem

2665 self.blank_finish = blank_finish

2666 return [], next_state, []

2667

2668

2669class DefinitionList(SpecializedBody):

2670

2671 """Second and subsequent definition_list_items."""

2672

2673 def text(self, match, context, next_state):

2674 """Definition lists."""

2675 return [match.string], 'Definition', []

2676

2677

2678class EnumeratedList(SpecializedBody):

2679

2680 """Second and subsequent enumerated_list list_items."""

2681

2682 def enumerator(self, match, context, next_state):

2683 """Enumerated list item."""

2684 format, sequence, text, ordinal = self.parse_enumerator(

2685 match, self.parent['enumtype'])

2686 if (format != self.format

2687 or (sequence != '#' and (sequence != self.parent['enumtype']

2688 or self.auto

2689 or ordinal != (self.lastordinal + 1)))

2690 or not self.is_enumerated_list_item(ordinal, sequence, format)):

2691 # different enumeration: new list

2692 self.invalid_input()

2693 if sequence == '#':

2694 self.auto = 1

2695 listitem, blank_finish = self.list_item(match.end())

2696 self.parent += listitem

2697 self.blank_finish = blank_finish

2698 self.lastordinal = ordinal

2699 return [], next_state, []

2700

2701

2702class FieldList(SpecializedBody):

2703

2704 """Second and subsequent field_list fields."""

2705

2706 def field_marker(self, match, context, next_state):

2707 """Field list field."""

2708 field, blank_finish = self.field(match)

2709 self.parent += field

2710 self.blank_finish = blank_finish

2711 return [], next_state, []

2712

2713

2714class OptionList(SpecializedBody):

2715

2716 """Second and subsequent option_list option_list_items."""

2717

2718 def option_marker(self, match, context, next_state):

2719 """Option list item."""

2720 try:

2721 option_list_item, blank_finish = self.option_list_item(match)

2722 except MarkupError:

2723 self.invalid_input()

2724 self.parent += option_list_item

2725 self.blank_finish = blank_finish

2726 return [], next_state, []

2727

2728

2729class RFC2822List(SpecializedBody, RFC2822Body):

2730

2731 """Second and subsequent RFC2822-style field_list fields."""

2732

2733 patterns = RFC2822Body.patterns

2734 initial_transitions = RFC2822Body.initial_transitions

2735

2736 def rfc2822(self, match, context, next_state):

2737 """RFC2822-style field list item."""

2738 field, blank_finish = self.rfc2822_field(match)

2739 self.parent += field

2740 self.blank_finish = blank_finish

2741 return [], 'RFC2822List', []

2742

2743 blank = SpecializedBody.invalid_input

2744

2745

2746class ExtensionOptions(FieldList):

2747

2748 """

2749 Parse field_list fields for extension options.

2750

2751 No nested parsing is done (including inline markup parsing).

2752 """

2753

2754 def parse_field_body(self, indented, offset, node) -> None:

2755 """Override `Body.parse_field_body` for simpler parsing."""

2756 lines = []

2757 for line in list(indented) + ['']:

2758 if line.strip():

2759 lines.append(line)

2760 elif lines:

2761 text = '\n'.join(lines)

2762 node += nodes.paragraph(text, text)

2763 lines = []

2764

2765

2766class LineBlock(SpecializedBody):

2767

2768 """Second and subsequent lines of a line_block."""

2769

2770 blank = SpecializedBody.invalid_input

2771

2772 def line_block(self, match, context, next_state):

2773 """New line of line block."""

2774 lineno = self.state_machine.abs_line_number()

2775 line, messages, blank_finish = self.line_block_line(match, lineno)

2776 self.parent += line

2777 self.parent.parent += messages

2778 self.blank_finish = blank_finish

2779 return [], next_state, []

2780

2781

2782class Explicit(SpecializedBody):

2783

2784 """Second and subsequent explicit markup construct."""

2785

2786 def explicit_markup(self, match, context, next_state):

2787 """Footnotes, hyperlink targets, directives, comments."""

2788 nodelist, blank_finish = self.explicit_construct(match)

2789 self.parent += nodelist

2790 self.blank_finish = blank_finish

2791 return [], next_state, []

2792

2793 def anonymous(self, match, context, next_state):

2794 """Anonymous hyperlink targets."""

2795 nodelist, blank_finish = self.anonymous_target(match)

2796 self.parent += nodelist

2797 self.blank_finish = blank_finish

2798 return [], next_state, []

2799

2800 blank = SpecializedBody.invalid_input

2801

2802

2803class SubstitutionDef(Body):

2804

2805 """

2806 Parser for the contents of a substitution_definition element.

2807 """

2808

2809 patterns = {

2810 'embedded_directive': re.compile(r'(%s)::( +|$)'

2811 % Inliner.simplename),

2812 'text': r''}

2813 initial_transitions = ['embedded_directive', 'text']

2814

2815 def embedded_directive(self, match, context, next_state):

2816 nodelist, blank_finish = self.directive(match,

2817 alt=self.parent['names'][0])

2818 self.parent += nodelist

2819 if not self.state_machine.at_eof():

2820 self.blank_finish = blank_finish

2821 raise EOFError

2822

2823 def text(self, match, context, next_state):

2824 if not self.state_machine.at_eof():

2825 self.blank_finish = self.state_machine.is_next_line_blank()

2826 raise EOFError

2827

2828

2829class Text(RSTState):

2830

2831 """

2832 Classifier of second line of a text block.

2833

2834 Could be a paragraph, a definition list item, or a title.

2835 """

2836

2837 patterns = {'underline': Body.patterns['line'],

2838 'text': r''}

2839 initial_transitions = [('underline', 'Body'), ('text', 'Body')]

2840

2841 def blank(self, match, context, next_state):

2842 """End of paragraph."""

2843 # NOTE: self.paragraph returns [node, system_message(s)], literalnext

2844 paragraph, literalnext = self.paragraph(

2845 context, self.state_machine.abs_line_number() - 1)

2846 self.parent += paragraph

2847 if literalnext:

2848 self.parent += self.literal_block()

2849 return [], 'Body', []

2850

2851 def eof(self, context):

2852 if context:

2853 self.blank(None, context, None)

2854 return []

2855

2856 def indent(self, match, context, next_state):

2857 """Definition list item."""

2858 dl = nodes.definition_list()

2859 # the definition list starts on the line before the indent:

2860 lineno = self.state_machine.abs_line_number() - 1

2861 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)

2862 dl_item, blank_finish = self.definition_list_item(context)

2863 dl += dl_item

2864 self.parent += dl

2865 offset = self.state_machine.line_offset + 1 # next line

2866 newline_offset, blank_finish = self.nested_list_parse(

2867 self.state_machine.input_lines[offset:],

2868 input_offset=self.state_machine.abs_line_offset() + 1,

2869 node=dl, initial_state='DefinitionList',

2870 blank_finish=blank_finish, blank_finish_state='Definition')

2871 self.goto_line(newline_offset)

2872 if not blank_finish:

2873 self.parent += self.unindent_warning('Definition list')

2874 return [], 'Body', []

2875

2876 def underline(self, match, context, next_state):

2877 """Section title."""

2878 lineno = self.state_machine.abs_line_number()

2879 title = context[0].rstrip()

2880 underline = match.string.rstrip()

2881 source = title + '\n' + underline

2882 messages = []

2883 if column_width(title) > len(underline):

2884 if len(underline) < 4:

2885 if self.state_machine.match_titles:

2886 msg = self.reporter.info(

2887 'Possible title underline, too short for the title.\n'

2888 "Treating it as ordinary text because it's so short.",

2889 line=lineno)

2890 self.parent += msg

2891 raise statemachine.TransitionCorrection('text')

2892 else:

2893 blocktext = context[0] + '\n' + self.state_machine.line

2894 msg = self.reporter.warning(

2895 'Title underline too short.',

2896 nodes.literal_block(blocktext, blocktext),

2897 line=lineno)

2898 messages.append(msg)

2899 if not self.state_machine.match_titles:

2900 blocktext = context[0] + '\n' + self.state_machine.line

2901 # We need get_source_and_line() here to report correctly

2902 src, srcline = self.state_machine.get_source_and_line()

2903 # TODO: why is abs_line_number() == srcline+1

2904 # if the error is in a table (try with test_tables.py)?

2905 # print("get_source_and_line", srcline)

2906 # print("abs_line_number", self.state_machine.abs_line_number())

2907 msg = self.reporter.error(

2908 'Unexpected section title.',

2909 nodes.literal_block(blocktext, blocktext),

2910 source=src, line=srcline)

2911 self.parent += messages

2912 self.parent += msg

2913 return [], next_state, []

2914 style = underline[0]

2915 context[:] = []

2916 self.section(title, source, style, lineno - 1, messages)

2917 return [], next_state, []

2918

2919 def text(self, match, context, next_state):

2920 """Paragraph."""

2921 startline = self.state_machine.abs_line_number() - 1

2922 msg = None

2923 try:

2924 block = self.state_machine.get_text_block(flush_left=True)

2925 except statemachine.UnexpectedIndentationError as err:

2926 block, src, srcline = err.args

2927 msg = self.reporter.error('Unexpected indentation.',

2928 source=src, line=srcline)

2929 lines = context + list(block)

2930 paragraph, literalnext = self.paragraph(lines, startline)

2931 self.parent += paragraph

2932 self.parent += msg

2933 if literalnext:

2934 try:

2935 self.state_machine.next_line()

2936 except EOFError:

2937 pass

2938 self.parent += self.literal_block()

2939 return [], next_state, []

2940

2941 def literal_block(self):

2942 """Return a list of nodes."""

2943 (indented, indent, offset, blank_finish

2944 ) = self.state_machine.get_indented()

2945 while indented and not indented[-1].strip():

2946 indented.trim_end()

2947 if not indented:

2948 return self.quoted_literal_block()

2949 data = '\n'.join(indented)

2950 literal_block = nodes.literal_block(data, data)

2951 (literal_block.source,

2952 literal_block.line) = self.state_machine.get_source_and_line(offset+1)

2953 nodelist = [literal_block]

2954 if not blank_finish:

2955 nodelist.append(self.unindent_warning('Literal block'))

2956 return nodelist

2957

2958 def quoted_literal_block(self):

2959 abs_line_offset = self.state_machine.abs_line_offset()

2960 offset = self.state_machine.line_offset

2961 parent_node = nodes.Element()

2962 new_abs_offset = self.nested_parse(

2963 self.state_machine.input_lines[offset:],

2964 input_offset=abs_line_offset, node=parent_node, match_titles=False,

2965 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),

2966 'initial_state': 'QuotedLiteralBlock'})

2967 self.goto_line(new_abs_offset)

2968 return parent_node.children

2969

2970 def definition_list_item(self, termline):

2971 # the parser is already on the second (indented) line:

2972 dd_lineno = self.state_machine.abs_line_number()

2973 dt_lineno = dd_lineno - 1

2974 (indented, indent, line_offset, blank_finish

2975 ) = self.state_machine.get_indented()

2976 dl_item = nodes.definition_list_item(

2977 '\n'.join(termline + list(indented)))

2978 (dl_item.source,

2979 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)

2980 dt_nodes, messages = self.term(termline, dt_lineno)

2981 dl_item += dt_nodes

2982 dd = nodes.definition('', *messages)

2983 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)

2984 dl_item += dd

2985 if termline[0][-2:] == '::':

2986 dd += self.reporter.info(

2987 'Blank line missing before literal block (after the "::")? '

2988 'Interpreted as a definition list item.',

2989 line=dd_lineno)

2990 # TODO: drop a definition if it is an empty comment to allow

2991 # definition list items with several terms?

2992 # https://sourceforge.net/p/docutils/feature-requests/60/

2993 self.nested_parse(indented, input_offset=line_offset, node=dd)

2994 return dl_item, blank_finish

2995

2996 classifier_delimiter = re.compile(' +: +')

2997

2998 def term(self, lines, lineno):

2999 """Return a definition_list's term and optional classifiers."""

3000 assert len(lines) == 1

3001 text_nodes, messages = self.inline_text(lines[0], lineno)

3002 dt = nodes.term(lines[0])

3003 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)

3004 node_list = [dt]

3005 for i in range(len(text_nodes)):

3006 node = text_nodes[i]

3007 if isinstance(node, nodes.Text):

3008 parts = self.classifier_delimiter.split(node)

3009 if len(parts) == 1:

3010 node_list[-1] += node

3011 else:

3012 text = parts[0].rstrip()

3013 textnode = nodes.Text(text)

3014 node_list[-1] += textnode

3015 node_list += [nodes.classifier(unescape(part, True), part)

3016 for part in parts[1:]]

3017 else:

3018 node_list[-1] += node

3019 return node_list, messages

3020

3021

3022class SpecializedText(Text):

3023

3024 """

3025 Superclass for second and subsequent lines of Text-variants.

3026

3027 All transition methods are disabled. Override individual methods in

3028 subclasses to re-enable.

3029 """

3030

3031 def eof(self, context):

3032 """Incomplete construct."""

3033 return []

3034

3035 def invalid_input(self, match=None, context=None, next_state=None):

3036 """Not a compound element member. Abort this state machine."""

3037 raise EOFError

3038

3039 blank = invalid_input

3040 indent = invalid_input

3041 underline = invalid_input

3042 text = invalid_input

3043

3044

3045class Definition(SpecializedText):

3046

3047 """Second line of potential definition_list_item."""

3048

3049 def eof(self, context):

3050 """Not a definition."""

3051 self.state_machine.previous_line(2) # so parent SM can reassess

3052 return []

3053

3054 def indent(self, match, context, next_state):

3055 """Definition list item."""

3056 dl_item, blank_finish = self.definition_list_item(context)

3057 self.parent += dl_item

3058 self.blank_finish = blank_finish

3059 return [], 'DefinitionList', []

3060

3061

3062class Line(SpecializedText):

3063

3064 """

3065 Second line of over- & underlined section title or transition marker.

3066 """

3067

3068 eofcheck = 1 # ignored, will be removed in Docutils 2.0.

3069

3070 def eof(self, context):

3071 """Transition marker at end of section or document."""

3072 marker = context[0].strip()

3073 if len(marker) < 4:

3074 self.state_correction(context)

3075 src, srcline = self.state_machine.get_source_and_line()

3076 # lineno = self.state_machine.abs_line_number() - 1

3077 transition = nodes.transition(rawsource=context[0])

3078 transition.source = src

3079 transition.line = srcline - 1

3080 # transition.line = lineno

3081 self.parent += transition

3082 return []

3083

3084 def blank(self, match, context, next_state):

3085 """Transition marker."""

3086 src, srcline = self.state_machine.get_source_and_line()

3087 marker = context[0].strip()

3088 if len(marker) < 4:

3089 self.state_correction(context)

3090 transition = nodes.transition(rawsource=marker)

3091 transition.source = src

3092 transition.line = srcline - 1

3093 self.parent += transition

3094 return [], 'Body', []

3095

3096 def text(self, match, context, next_state):

3097 """Potential over- & underlined title."""

3098 lineno = self.state_machine.abs_line_number() - 1

3099 overline = context[0]

3100 title = match.string

3101 underline = ''

3102 try:

3103 underline = self.state_machine.next_line()

3104 except EOFError:

3105 blocktext = overline + '\n' + title

3106 if len(overline.rstrip()) < 4:

3107 self.short_overline(context, blocktext, lineno, 2)

3108 else:

3109 msg = self.reporter.error(

3110 'Incomplete section title.',

3111 nodes.literal_block(blocktext, blocktext),

3112 line=lineno)

3113 self.parent += msg

3114 return [], 'Body', []

3115 source = '%s\n%s\n%s' % (overline, title, underline)

3116 overline = overline.rstrip()

3117 underline = underline.rstrip()

3118 if not self.transitions['underline'][0].match(underline):

3119 blocktext = overline + '\n' + title + '\n' + underline

3120 if len(overline.rstrip()) < 4:

3121 self.short_overline(context, blocktext, lineno, 2)

3122 else:

3123 msg = self.reporter.error(

3124 'Missing matching underline for section title overline.',

3125 nodes.literal_block(source, source),

3126 line=lineno)

3127 self.parent += msg

3128 return [], 'Body', []

3129 elif overline != underline:

3130 blocktext = overline + '\n' + title + '\n' + underline

3131 if len(overline.rstrip()) < 4:

3132 self.short_overline(context, blocktext, lineno, 2)

3133 else:

3134 msg = self.reporter.error(

3135 'Title overline & underline mismatch.',

3136 nodes.literal_block(source, source),

3137 line=lineno)

3138 self.parent += msg

3139 return [], 'Body', []

3140 title = title.rstrip()

3141 messages = []

3142 if column_width(title) > len(overline):

3143 blocktext = overline + '\n' + title + '\n' + underline

3144 if len(overline.rstrip()) < 4:

3145 self.short_overline(context, blocktext, lineno, 2)

3146 else:

3147 msg = self.reporter.warning(

3148 'Title overline too short.',

3149 nodes.literal_block(source, source),

3150 line=lineno)

3151 messages.append(msg)

3152 style = (overline[0], underline[0])

3153 self.section(title.lstrip(), source, style, lineno + 1, messages)

3154 return [], 'Body', []

3155

3156 indent = text # indented title

3157

3158 def underline(self, match, context, next_state):

3159 overline = context[0]

3160 blocktext = overline + '\n' + self.state_machine.line

3161 lineno = self.state_machine.abs_line_number() - 1

3162 if len(overline.rstrip()) < 4:

3163 self.short_overline(context, blocktext, lineno, 1)

3164 msg = self.reporter.error(

3165 'Invalid section title or transition marker.',

3166 nodes.literal_block(blocktext, blocktext),

3167 line=lineno)

3168 self.parent += msg

3169 return [], 'Body', []

3170

3171 def short_overline(self, context, blocktext, lineno, lines=1) -> None:

3172 msg = self.reporter.info(

3173 'Possible incomplete section title.\nTreating the overline as '

3174 "ordinary text because it's so short.",

3175 line=lineno)

3176 self.parent += msg

3177 self.state_correction(context, lines)

3178

3179 def state_correction(self, context, lines=1):

3180 self.state_machine.previous_line(lines)

3181 context[:] = []

3182 raise statemachine.StateCorrection('Body', 'text')

3183

3184

3185class QuotedLiteralBlock(RSTState):

3186

3187 """

3188 Nested parse handler for quoted (unindented) literal blocks.

3189

3190 Special-purpose. Not for inclusion in `state_classes`.

3191 """

3192

3193 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,

3194 'text': r''}

3195 initial_transitions = ('initial_quoted', 'text')

3196

3197 def __init__(self, state_machine, debug=False) -> None:

3198 RSTState.__init__(self, state_machine, debug)

3199 self.messages = []

3200 self.initial_lineno = None

3201

3202 def blank(self, match, context, next_state):

3203 if context:

3204 raise EOFError

3205 else:

3206 return context, next_state, []

3207

3208 def eof(self, context):

3209 if context:

3210 src, srcline = self.state_machine.get_source_and_line(

3211 self.initial_lineno)

3212 text = '\n'.join(context)

3213 literal_block = nodes.literal_block(text, text)

3214 literal_block.source = src

3215 literal_block.line = srcline

3216 self.parent += literal_block

3217 else:

3218 self.parent += self.reporter.warning(

3219 'Literal block expected; none found.',

3220 line=self.state_machine.abs_line_number()

3221 ) # src not available, statemachine.input_lines is empty

3222 self.state_machine.previous_line()

3223 self.parent += self.messages

3224 return []

3225

3226 def indent(self, match, context, next_state):

3227 assert context, ('QuotedLiteralBlock.indent: context should not '

3228 'be empty!')

3229 self.messages.append(

3230 self.reporter.error('Unexpected indentation.',

3231 line=self.state_machine.abs_line_number()))

3232 self.state_machine.previous_line()

3233 raise EOFError

3234

3235 def initial_quoted(self, match, context, next_state):

3236 """Match arbitrary quote character on the first line only."""

3237 self.remove_transition('initial_quoted')

3238 quote = match.string[0]

3239 pattern = re.compile(re.escape(quote))

3240 # New transition matches consistent quotes only:

3241 self.add_transition('quoted',

3242 (pattern, self.quoted, self.__class__.__name__))

3243 self.initial_lineno = self.state_machine.abs_line_number()

3244 return [match.string], next_state, []

3245

3246 def quoted(self, match, context, next_state):

3247 """Match consistent quotes on subsequent lines."""

3248 context.append(match.string)

3249 return context, next_state, []

3250

3251 def text(self, match, context, next_state):

3252 if context:

3253 self.messages.append(

3254 self.reporter.error('Inconsistent literal block quoting.',

3255 line=self.state_machine.abs_line_number()))

3256 self.state_machine.previous_line()

3257 raise EOFError

3258

3259

3260state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,

3261 OptionList, LineBlock, ExtensionOptions, Explicit, Text,

3262 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)

3263"""Standard set of State classes used to start `RSTStateMachine`."""