Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/parsers/rst/states.py: 67%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Copyright: This module has been placed in the public domain.

5"""

6This is the ``docutils.parsers.rst.states`` module, the core of

7the reStructuredText parser. It defines the following:

9:Classes:

10 - `RSTStateMachine`: reStructuredText parser's entry point.

11 - `NestedStateMachine`: recursive StateMachine.

12 - `RSTState`: reStructuredText State superclass.

13 - `Inliner`: For parsing inline markup.

14 - `Body`: Generic classifier of the first line of a block.

15 - `SpecializedBody`: Superclass for compound element members.

16 - `BulletList`: Second and subsequent bullet_list list_items

17 - `DefinitionList`: Second+ definition_list_items.

18 - `EnumeratedList`: Second+ enumerated_list list_items.

19 - `FieldList`: Second+ fields.

20 - `OptionList`: Second+ option_list_items.

21 - `RFC2822List`: Second+ RFC2822-style fields.

22 - `ExtensionOptions`: Parses directive option fields.

23 - `Explicit`: Second+ explicit markup constructs.

24 - `SubstitutionDef`: For embedded directives in substitution definitions.

25 - `Text`: Classifier of second line of a text block.

26 - `SpecializedText`: Superclass for continuation lines of Text-variants.

27 - `Definition`: Second line of potential definition_list_item.

28 - `Line`: Second line of overlined section title or transition marker.

29 - `Struct`: obsolete, use `types.SimpleNamespace`.

31:Exception classes:

32 - `MarkupError`

33 - `ParserError`

34 - `MarkupMismatch`

36:Functions:

37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.

38 - `unescape()`: Return a string, nulls removed or restored to backslashes.

40:Attributes:

41 - `state_classes`: set of State classes used with `RSTStateMachine`.

43Parser Overview

44===============

46The reStructuredText parser is implemented as a recursive state machine,

47examining its input one line at a time. To understand how the parser works,

48please first become familiar with the `docutils.statemachine` module. In the

49description below, references are made to classes defined in this module;

50please see the individual classes for details.

52Parsing proceeds as follows:

541. The state machine examines each line of input, checking each of the

55 transition patterns of the state `Body`, in order, looking for a match.

56 The implicit transitions (blank lines and indentation) are checked before

57 any others. The 'text' transition is a catch-all (matches anything).

592. The method associated with the matched transition pattern is called.

61 A. Some transition methods are self-contained, appending elements to the

62 document tree (`Body.doctest` parses a doctest block). The parser's

63 current line index is advanced to the end of the element, and parsing

64 continues with step 1.

66 B. Other transition methods trigger the creation of a nested state machine,

67 whose job is to parse a compound construct ('indent' does a block quote,

68 'bullet' does a bullet list, 'overline' does a section [first checking

69 for a valid section header], etc.).

71 - In the case of lists and explicit markup, a one-off state machine is

72 created and run to parse contents of the first item.

74 - A new state machine is created and its initial state is set to the

75 appropriate specialized state (`BulletList` in the case of the

76 'bullet' transition; see `SpecializedBody` for more detail). This

77 state machine is run to parse the compound element (or series of

78 explicit markup elements), and returns as soon as a non-member element

79 is encountered. For example, the `BulletList` state machine ends as

80 soon as it encounters an element which is not a list item of that

81 bullet list. The optional omission of inter-element blank lines is

82 enabled by this nested state machine.

84 - The current line index is advanced to the end of the elements parsed,

85 and parsing continues with step 1.

87 C. The result of the 'text' transition depends on the next line of text.

88 The current state is changed to `Text`, under which the second line is

89 examined. If the second line is:

91 - Indented: The element is a definition list item, and parsing proceeds

92 similarly to step 2.B, using the `DefinitionList` state.

94 - A line of uniform punctuation characters: The element is a section

95 header; again, parsing proceeds as in step 2.B, and `Body` is still

96 used.

98 - Anything else: The element is a paragraph, which is examined for

99 inline markup and appended to the parent element. Processing

100 continues with step 1.

101"""

102

103from __future__ import annotations

104

105__docformat__ = 'reStructuredText'

106

107import re

108from types import FunctionType, MethodType

109from types import SimpleNamespace as Struct

110import warnings

111

112from docutils import nodes, statemachine, utils

113from docutils import ApplicationError, DataError

114from docutils.statemachine import StateMachineWS, StateWS

115from docutils.nodes import fully_normalize_name as normalize_name

116from docutils.nodes import unescape, whitespace_normalize_name

117import docutils.parsers.rst

118from docutils.parsers.rst import directives, languages, tableparser, roles

119from docutils.utils import escape2null, column_width, strip_combining_chars

120from docutils.utils import punctuation_chars, urischemes

121from docutils.utils import split_escaped_whitespace

122from docutils.utils._roman_numerals import (InvalidRomanNumeralError,

123 RomanNumeral)

124

125TYPE_CHECKING = False

126if TYPE_CHECKING:

127 from docutils.statemachine import StringList

128

129

130class MarkupError(DataError): pass

131class UnknownInterpretedRoleError(DataError): pass

132class InterpretedRoleNotImplementedError(DataError): pass

133class ParserError(ApplicationError): pass

134class MarkupMismatch(Exception): pass

135

136

137class RSTStateMachine(StateMachineWS):

138

139 """

140 reStructuredText's master StateMachine.

141

142 The entry point to reStructuredText parsing is the `run()` method.

143 """

144 section_level_offset: int = 0

145 """Correction term for section level determination in nested parsing.

146

147 Updated by `RSTState.nested_parse()` and used in

148 `RSTState.check_subsection()` to compensate differences when

149 nested parsing uses a detached base node with a document-wide

150 section title style hierarchy or the current node with a new,

151 independent title style hierarchy.

152 """

153

154 def run(self, input_lines, document, input_offset=0, match_titles=True,

155 inliner=None) -> None:

156 """

157 Parse `input_lines` and modify the `document` node in place.

158

159 Extend `StateMachineWS.run()`: set up parse-global data and

160 run the StateMachine.

161 """

162 self.language = languages.get_language(

163 document.settings.language_code, document.reporter)

164 self.match_titles = match_titles

165 if inliner is None:

166 inliner = Inliner()

167 inliner.init_customizations(document.settings)

168 # A collection of objects to share with nested parsers.

169 # The attributes `reporter`, `section_level`, and

170 # `section_bubble_up_kludge` will be removed in Docutils 2.0

171 self.memo = Struct(document=document,

172 reporter=document.reporter, # ignored

173 language=self.language,

174 title_styles=[],

175 section_level=0, # ignored

176 section_bubble_up_kludge=False, # ignored

177 inliner=inliner)

178 self.document = document

179 self.attach_observer(document.note_source)

180 self.reporter = self.document.reporter

181 self.node = document

182 results = StateMachineWS.run(self, input_lines, input_offset,

183 input_source=document['source'])

184 assert results == [], 'RSTStateMachine.run() results should be empty!'

185 self.node = self.memo = None # remove unneeded references

186

187

188class NestedStateMachine(RSTStateMachine):

189 """

190 StateMachine run from within other StateMachine runs, to parse nested

191 document structures.

192 """

193

194 def __init__(self, state_classes, initial_state,

195 debug=False, parent_state_machine=None) -> None:

196

197 self.parent_state_machine = parent_state_machine

198 """The instance of the parent state machine."""

199

200 super().__init__(state_classes, initial_state, debug)

201

202 def run(self, input_lines, input_offset, memo, node, match_titles=True):

203 """

204 Parse `input_lines` and populate `node`.

205

206 Extend `StateMachineWS.run()`: set up document-wide data.

207 """

208 self.match_titles = match_titles

209 self.memo = memo

210 self.document = memo.document

211 self.attach_observer(self.document.note_source)

212 self.language = memo.language

213 self.reporter = self.document.reporter

214 self.node = node

215 results = StateMachineWS.run(self, input_lines, input_offset)

216 assert results == [], ('NestedStateMachine.run() results should be '

217 'empty!')

218 return results

219

220

221class RSTState(StateWS):

222

223 """

224 reStructuredText State superclass.

225

226 Contains methods used by all State subclasses.

227 """

228

229 nested_sm = NestedStateMachine

230 nested_sm_cache = []

231

232 def __init__(self, state_machine: RSTStateMachine, debug=False) -> None:

233 self.nested_sm_kwargs = {'state_classes': state_classes,

234 'initial_state': 'Body'}

235 StateWS.__init__(self, state_machine, debug)

236

237 def runtime_init(self) -> None:

238 StateWS.runtime_init(self)

239 memo = self.state_machine.memo

240 self.memo = memo

241 self.document = memo.document

242 self.inliner = memo.inliner

243 self.reporter = self.document.reporter

244 # enable the reporter to determine source and source-line

245 if not hasattr(self.reporter, 'get_source_and_line'):

246 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501

247

248 @property

249 def parent(self) -> nodes.Element | None:

250 return self.state_machine.node

251

252 @parent.setter

253 def parent(self, value: nodes.Element):

254 self.state_machine.node = value

255

256 def goto_line(self, abs_line_offset) -> None:

257 """

258 Jump to input line `abs_line_offset`, ignoring jumps past the end.

259 """

260 try:

261 self.state_machine.goto_line(abs_line_offset)

262 except EOFError:

263 pass

264

265 def no_match(self, context, transitions):

266 """

267 Override `StateWS.no_match` to generate a system message.

268

269 This code should never be run.

270 """

271 self.reporter.severe(

272 'Internal error: no transition pattern match. State: "%s"; '

273 'transitions: %s; context: %s; current line: %r.'

274 % (self.__class__.__name__, transitions, context,

275 self.state_machine.line))

276 return context, None, []

277

278 def bof(self, context):

279 """Called at beginning of file."""

280 return [], []

281

282 def nested_parse(self,

283 block: StringList,

284 input_offset: int,

285 node: nodes.Element|None = None,

286 match_titles: bool = False,

287 state_machine_class: StateMachineWS|None = None,

288 state_machine_kwargs: dict|None = None

289 ) -> int:

290 """

291 Parse the input `block` with a nested state-machine rooted at `node`.

292

293 :block:

294 reStructuredText source extract.

295 :input_offset:

296 Line number at start of the block.

297 :node:

298 Base node. Generated nodes will be appended to this node.

299 Default: the "current node" (`self.state_machine.node`).

300 :match_titles:

301 Allow section titles?

302 Caution: With a custom base node, this may lead to an invalid

303 or mixed up document tree. [#]_

304 :state_machine_class:

305 Default: `NestedStateMachine`.

306 :state_machine_kwargs:

307 Keyword arguments for the state-machine instantiation.

308 Default: `self.nested_sm_kwargs`.

309

310 Create a new state-machine instance if required.

311 Return new offset.

312

313 .. [#] See also ``test_parsers/test_rst/test_nested_parsing.py``

314 and Sphinx's `nested_parse_to_nodes()`__.

315

316 __ https://www.sphinx-doc.org/en/master/extdev/utils.html

317 #sphinx.util.parsing.nested_parse_to_nodes

318 """

319 if node is None:

320 node = self.state_machine.node

321 use_default = 0

322 if state_machine_class is None:

323 state_machine_class = self.nested_sm

324 use_default += 1

325 if state_machine_kwargs is None:

326 state_machine_kwargs = self.nested_sm_kwargs

327 use_default += 1

328 my_state_machine = None

329 if use_default == 2:

330 try:

331 # get cached state machine, prevent others from using it

332 my_state_machine = self.nested_sm_cache.pop()

333 except IndexError:

334 pass

335 if not my_state_machine:

336 my_state_machine = state_machine_class(

337 debug=self.debug,

338 parent_state_machine=self.state_machine,

339 **state_machine_kwargs)

340 # Check if we may use sections (with a caveat for custom nodes

341 # that may be dummies to collect children):

342 if (node == self.state_machine.node

343 and not isinstance(node, (nodes.document, nodes.section))):

344 match_titles = False # avoid invalid sections

345 if match_titles:

346 # Compensate mismatch of known title styles and number of

347 # parent sections of the base node if the document wide

348 # title styles are used with a detached base node or

349 # a new list of title styles with the current parent node:

350 l_node = len(node.section_hierarchy())

351 l_start = min(len(self.parent.section_hierarchy()),

352 len(self.memo.title_styles))

353 my_state_machine.section_level_offset = l_start - l_node

354

355 # run the state machine and populate `node`:

356 block_length = len(block)

357 my_state_machine.run(block, input_offset, self.memo,

358 node, match_titles)

359

360 if match_titles:

361 if node == self.state_machine.node:

362 # Pass on the new "current node" to parent state machines:

363 sm = self.state_machine

364 try:

365 while True:

366 sm.node = my_state_machine.node

367 sm = sm.parent_state_machine

368 except AttributeError:

369 pass

370 # clean up

371 new_offset = my_state_machine.abs_line_offset()

372 if use_default == 2:

373 self.nested_sm_cache.append(my_state_machine)

374 else:

375 my_state_machine.unlink()

376 # No `block.parent` implies disconnected -- lines aren't in sync:

377 if block.parent and (len(block) - block_length) != 0:

378 # Adjustment for block if modified in nested parse:

379 self.state_machine.next_line(len(block) - block_length)

380 return new_offset

381

382 def nested_list_parse(self, block, input_offset, node, initial_state,

383 blank_finish,

384 blank_finish_state=None,

385 extra_settings={},

386 match_titles=False, # deprecated, will be removed

387 state_machine_class=None,

388 state_machine_kwargs=None):

389 """

390 Parse the input `block` with a nested state-machine rooted at `node`.

391

392 Create a new StateMachine rooted at `node` and run it over the

393 input `block` (see also `nested_parse()`).

394 Also keep track of optional intermediate blank lines and the

395 required final one.

396

397 Return new offset and a boolean indicating whether there was a

398 blank final line.

399 """

400 if match_titles:

401 warnings.warn('The "match_titles" argument of '

402 'parsers.rst.states.RSTState.nested_list_parse() '

403 'will be ignored in Docutils 1.0 '

404 'and removed in Docutils 2.0.',

405 PendingDeprecationWarning, stacklevel=2)

406 if state_machine_class is None:

407 state_machine_class = self.nested_sm

408 if state_machine_kwargs is None:

409 state_machine_kwargs = self.nested_sm_kwargs.copy()

410 state_machine_kwargs['initial_state'] = initial_state

411 my_state_machine = state_machine_class(

412 debug=self.debug,

413 parent_state_machine=self.state_machine,

414 **state_machine_kwargs)

415 if blank_finish_state is None:

416 blank_finish_state = initial_state

417 my_state_machine.states[blank_finish_state].blank_finish = blank_finish

418 for key, value in extra_settings.items():

419 setattr(my_state_machine.states[initial_state], key, value)

420 my_state_machine.run(block, input_offset, memo=self.memo,

421 node=node, match_titles=match_titles)

422 blank_finish = my_state_machine.states[blank_finish_state].blank_finish

423 my_state_machine.unlink()

424 return my_state_machine.abs_line_offset(), blank_finish

425

426 def section(self, title, source, style, lineno, messages) -> None:

427 """Check for a valid subsection and create one if it checks out."""

428 if self.check_subsection(source, style, lineno):

429 self.new_subsection(title, lineno, messages)

430

431 def check_subsection(self, source, style, lineno) -> bool:

432 """

433 Check for a valid subsection header. Update section data in `memo`.

434

435 When a new section is reached that isn't a subsection of the current

436 section, set `self.parent` to the new section's parent section

437 (or the root node if the new section is a top-level section).

438 """

439 title_styles = self.memo.title_styles

440 parent_sections = self.parent.section_hierarchy()

441 # current section level: (0 root, 1 section, 2 subsection, ...)

442 oldlevel = (len(parent_sections)

443 + self.state_machine.section_level_offset)

444 # new section level:

445 try: # check for existing title style

446 newlevel = title_styles.index(style) + 1

447 except ValueError: # new title style

448 newlevel = len(title_styles) + 1

449 # The new level must not be deeper than an immediate child

450 # of the current level:

451 if newlevel > oldlevel + 1:

452 styles = ' '.join('/'.join(style) for style in title_styles)

453 self.parent += self.reporter.error(

454 'Inconsistent title style:'

455 f' skip from level {oldlevel} to {newlevel}.',

456 nodes.literal_block('', source),

457 nodes.paragraph('', f'Established title styles: {styles}'),

458 line=lineno)

459 return False

460 if newlevel <= oldlevel:

461 # new section is sibling or higher up in the section hierarchy

462 try:

463 new_parent = parent_sections[newlevel-oldlevel-1].parent

464 except IndexError:

465 styles = ' '.join('/'.join(style) for style in title_styles)

466 details = (f'The parent of level {newlevel} sections cannot'

467 ' be reached. The parser is at section level'

468 f' {oldlevel} but the current node has only'

469 f' {len(parent_sections)} parent section(s).'

470 '\nOne reason may be a high level'

471 ' section used in a directive that parses its'

472 ' content into a base node not attached to'

473 ' the document\n(up to Docutils 0.21,'

474 ' these sections were silently dropped).')

475 self.parent += self.reporter.error(

476 f'A level {newlevel} section cannot be used here.',

477 nodes.literal_block('', source),

478 nodes.paragraph('', f'Established title styles: {styles}'),

479 nodes.paragraph('', details),

480 line=lineno)

481 return False

482 self.parent = new_parent

483 self.memo.section_level = newlevel - 1

484 if newlevel > len(title_styles):

485 title_styles.append(style)

486 return True

487

488 def title_inconsistent(self, sourcetext, lineno):

489 # Ignored. Will be removed in Docutils 2.0.

490 error = self.reporter.error(

491 'Title level inconsistent:', nodes.literal_block('', sourcetext),

492 line=lineno)

493 return error

494

495 def new_subsection(self, title, lineno, messages):

496 """Append new subsection to document tree."""

497 section_node = nodes.section()

498 self.parent += section_node

499 textnodes, title_messages = self.inline_text(title, lineno)

500 titlenode = nodes.title(title, '', *textnodes)

501 name = normalize_name(titlenode.astext())

502 section_node['names'].append(name)

503 section_node += titlenode

504 section_node += messages

505 section_node += title_messages

506 self.document.note_implicit_target(section_node, section_node)

507 # Update state:

508 self.parent = section_node

509 self.memo.section_level += 1

510

511 def paragraph(self, lines, lineno):

512 """

513 Return a list (paragraph & messages) & a boolean: literal_block next?

514 """

515 data = '\n'.join(lines).rstrip()

516 if re.search(r'(?<!\\)(\\\\)*::$', data):

517 if len(data) == 2:

518 return [], 1

519 elif data[-3] in ' \n':

520 text = data[:-3].rstrip()

521 else:

522 text = data[:-1]

523 literalnext = 1

524 else:

525 text = data

526 literalnext = 0

527 textnodes, messages = self.inline_text(text, lineno)

528 p = nodes.paragraph(data, '', *textnodes)

529 p.source, p.line = self.state_machine.get_source_and_line(lineno)

530 return [p] + messages, literalnext

531

532 def inline_text(self, text, lineno):

533 """

534 Return 2 lists: nodes (text and inline elements), and system_messages.

535 """

536 nodes, messages = self.inliner.parse(text, lineno,

537 self.memo, self.parent)

538 return nodes, messages

539

540 def unindent_warning(self, node_name):

541 # the actual problem is one line below the current line

542 lineno = self.state_machine.abs_line_number() + 1

543 return self.reporter.warning('%s ends without a blank line; '

544 'unexpected unindent.' % node_name,

545 line=lineno)

546

547

548def build_regexp(definition, compile_patterns=True):

549 """

550 Build, compile and return a regular expression based on `definition`.

551

552 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),

553 where "parts" is a list of regular expressions and/or regular

554 expression definitions to be joined into an or-group.

555 """

556 name, prefix, suffix, parts = definition

557 part_strings = []

558 for part in parts:

559 if isinstance(part, tuple):

560 part_strings.append(build_regexp(part, None))

561 else:

562 part_strings.append(part)

563 or_group = '|'.join(part_strings)

564 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()

565 if compile_patterns:

566 return re.compile(regexp)

567 else:

568 return regexp

569

570

571class Inliner:

572

573 """

574 Parse inline markup; call the `parse()` method.

575 """

576

577 def __init__(self) -> None:

578 self.implicit_dispatch = []

579 """List of (pattern, bound method) tuples, used by

580 `self.implicit_inline`."""

581

582 def init_customizations(self, settings) -> None:

583 # lookahead and look-behind expressions for inline markup rules

584 if getattr(settings, 'character_level_inline_markup', False):

585 start_string_prefix = '(^|(?<!\x00))'

586 end_string_suffix = ''

587 else:

588 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %

589 (punctuation_chars.openers,

590 punctuation_chars.delimiters))

591 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %

592 (punctuation_chars.closing_delimiters,

593 punctuation_chars.delimiters,

594 punctuation_chars.closers))

595 args = locals().copy()

596 args.update(vars(self.__class__))

597

598 parts = ('initial_inline', start_string_prefix, '',

599 [

600 ('start', '', self.non_whitespace_after, # simple start-strings

601 [r'\*\*', # strong

602 r'\*(?!\*)', # emphasis but not strong

603 r'``', # literal

604 r'_`', # inline internal target

605 r'\|(?!\|)'] # substitution reference

606 ),

607 ('whole', '', end_string_suffix, # whole constructs

608 [ # reference name & end-string

609 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,

610 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',

611 [r'[0-9]+', # manually numbered

612 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)

613 r'\*', # auto-symbol

614 r'(?P<citationlabel>%s)' % self.simplename, # citation ref

615 ]

616 )

617 ]

618 ),

619 ('backquote', # interpreted text or phrase reference

620 '(?P<role>(:%s:)?)' % self.simplename, # optional role

621 self.non_whitespace_after,

622 ['`(?!`)'] # but not literal

623 )

624 ]

625 )

626 self.start_string_prefix = start_string_prefix

627 self.end_string_suffix = end_string_suffix

628 self.parts = parts

629

630 self.patterns = Struct(

631 initial=build_regexp(parts),

632 emphasis=re.compile(self.non_whitespace_escape_before

633 + r'(\*)' + end_string_suffix),

634 strong=re.compile(self.non_whitespace_escape_before

635 + r'(\*\*)' + end_string_suffix),

636 interpreted_or_phrase_ref=re.compile(

637 r"""

638 %(non_unescaped_whitespace_escape_before)s

639 (

640 `

641 (?P<suffix>

642 (?P<role>:%(simplename)s:)?

643 (?P<refend>__?)?

644 )

645 )

646 %(end_string_suffix)s

647 """ % args, re.VERBOSE),

648 embedded_link=re.compile(

649 r"""

650 (

651 (?:[ \n]+|^) # spaces or beginning of line/string

652 < # open bracket

653 %(non_whitespace_after)s

654 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets

655 %(non_whitespace_escape_before)s

656 > # close bracket

657 )

658 $ # end of string

659 """ % args, re.VERBOSE),

660 literal=re.compile(self.non_whitespace_before + '(``)'

661 + end_string_suffix),

662 target=re.compile(self.non_whitespace_escape_before

663 + r'(`)' + end_string_suffix),

664 substitution_ref=re.compile(self.non_whitespace_escape_before

665 + r'(\|_{0,2})'

666 + end_string_suffix),

667 email=re.compile(self.email_pattern % args + '$',

668 re.VERBOSE),

669 uri=re.compile(

670 (r"""

671 %(start_string_prefix)s

672 (?P<whole>

673 (?P<absolute> # absolute URI

674 (?P<scheme> # scheme (http, ftp, mailto)

675 [a-zA-Z][a-zA-Z0-9.+-]*

676 )

677 :

678 (

679 ( # either:

680 (//?)? # hierarchical URI

681 %(uric)s* # URI characters

682 %(uri_end)s # final URI char

683 )

684 ( # optional query

685 \?%(uric)s*

686 %(uri_end)s

687 )?

688 ( # optional fragment

689 \#%(uric)s*

690 %(uri_end)s

691 )?

692 )

693 )

694 | # *OR*

695 (?P<email> # email address

696 """ + self.email_pattern + r"""

697 )

698 )

699 %(end_string_suffix)s

700 """) % args, re.VERBOSE),

701 pep=re.compile(

702 r"""

703 %(start_string_prefix)s

704 (

705 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file

706 |

707 (PEP\s+(?P<pepnum2>\d+)) # reference by name

708 )

709 %(end_string_suffix)s""" % args, re.VERBOSE),

710 rfc=re.compile(

711 r"""

712 %(start_string_prefix)s

713 (RFC(-|\s+)?(?P<rfcnum>\d+))

714 %(end_string_suffix)s""" % args, re.VERBOSE))

715

716 self.implicit_dispatch.append((self.patterns.uri,

717 self.standalone_uri))

718 if settings.pep_references:

719 self.implicit_dispatch.append((self.patterns.pep,

720 self.pep_reference))

721 if settings.rfc_references:

722 self.implicit_dispatch.append((self.patterns.rfc,

723 self.rfc_reference))

724

725 def parse(self, text, lineno, memo, parent):

726 # Needs to be refactored for nested inline markup.

727 # Add nested_parse() method?

728 """

729 Return 2 lists: nodes (text and inline elements), and system_messages.

730

731 Using `self.patterns.initial`, a pattern which matches start-strings

732 (emphasis, strong, interpreted, phrase reference, literal,

733 substitution reference, and inline target) and complete constructs

734 (simple reference, footnote reference), search for a candidate. When

735 one is found, check for validity (e.g., not a quoted '*' character).

736 If valid, search for the corresponding end string if applicable, and

737 check it for validity. If not found or invalid, generate a warning

738 and ignore the start-string. Implicit inline markup (e.g. standalone

739 URIs) is found last.

740

741 :text: source string

742 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`

743 """

744 self.document = memo.document

745 self.language = memo.language

746 self.reporter = self.document.reporter

747 self.parent = parent

748 pattern_search = self.patterns.initial.search

749 dispatch = self.dispatch

750 remaining = escape2null(text)

751 processed = []

752 unprocessed = []

753 messages = []

754 while remaining:

755 match = pattern_search(remaining)

756 if match:

757 groups = match.groupdict()

758 method = dispatch[groups['start'] or groups['backquote']

759 or groups['refend'] or groups['fnend']]

760 before, inlines, remaining, sysmessages = method(self, match,

761 lineno)

762 unprocessed.append(before)

763 messages += sysmessages

764 if inlines:

765 processed += self.implicit_inline(''.join(unprocessed),

766 lineno)

767 processed += inlines

768 unprocessed = []

769 else:

770 break

771 remaining = ''.join(unprocessed) + remaining

772 if remaining:

773 processed += self.implicit_inline(remaining, lineno)

774 return processed, messages

775

776 # Inline object recognition

777 # -------------------------

778 # See also init_customizations().

779 non_whitespace_before = r'(?<!\s)'

780 non_whitespace_escape_before = r'(?<![\s\x00])'

781 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'

782 non_whitespace_after = r'(?!\s)'

783 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):

784 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'

785 # Valid URI characters (see RFC 2396 & RFC 2732);

786 # final \x00 allows backslash escapes in URIs:

787 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""

788 # Delimiter indicating the end of a URI (not part of the URI):

789 uri_end_delim = r"""[>]"""

790 # Last URI character; same as uric but no punctuation:

791 urilast = r"""[_~*/=+a-zA-Z0-9]"""

792 # End of a URI (either 'urilast' or 'uric followed by a

793 # uri_end_delim'):

794 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()

795 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""

796 email_pattern = r"""

797 %(emailc)s+(?:\.%(emailc)s+)* # name

798 (?<!\x00)@ # at

799 %(emailc)s+(?:\.%(emailc)s*)* # host

800 %(uri_end)s # final URI char

801 """

802

803 def quoted_start(self, match):

804 """Test if inline markup start-string is 'quoted'.

805

806 'Quoted' in this context means the start-string is enclosed in a pair

807 of matching opening/closing delimiters (not necessarily quotes)

808 or at the end of the match.

809 """

810 string = match.string

811 start = match.start()

812 if start == 0: # start-string at beginning of text

813 return False

814 prestart = string[start - 1]

815 try:

816 poststart = string[match.end()]

817 except IndexError: # start-string at end of text

818 return True # not "quoted" but no markup start-string either

819 return punctuation_chars.match_chars(prestart, poststart)

820

821 def inline_obj(self, match, lineno, end_pattern, nodeclass,

822 restore_backslashes=False):

823 string = match.string

824 matchstart = match.start('start')

825 matchend = match.end('start')

826 if self.quoted_start(match):

827 return string[:matchend], [], string[matchend:], [], ''

828 endmatch = end_pattern.search(string[matchend:])

829 if endmatch and endmatch.start(1): # 1 or more chars

830 text = endmatch.string[:endmatch.start(1)]

831 if restore_backslashes:

832 text = unescape(text, True)

833 textend = matchend + endmatch.end(1)

834 rawsource = unescape(string[matchstart:textend], True)

835 node = nodeclass(rawsource, text)

836 return (string[:matchstart], [node],

837 string[textend:], [], endmatch.group(1))

838 msg = self.reporter.warning(

839 'Inline %s start-string without end-string.'

840 % nodeclass.__name__, line=lineno)

841 text = unescape(string[matchstart:matchend], True)

842 prb = self.problematic(text, text, msg)

843 return string[:matchstart], [prb], string[matchend:], [msg], ''

844

845 def problematic(self, text, rawsource, message):

846 msgid = self.document.set_id(message, self.parent)

847 problematic = nodes.problematic(rawsource, text, refid=msgid)

848 prbid = self.document.set_id(problematic)

849 message.add_backref(prbid)

850 return problematic

851

852 def emphasis(self, match, lineno):

853 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

854 match, lineno, self.patterns.emphasis, nodes.emphasis)

855 return before, inlines, remaining, sysmessages

856

857 def strong(self, match, lineno):

858 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

859 match, lineno, self.patterns.strong, nodes.strong)

860 return before, inlines, remaining, sysmessages

861

862 def interpreted_or_phrase_ref(self, match, lineno):

863 end_pattern = self.patterns.interpreted_or_phrase_ref

864 string = match.string

865 matchstart = match.start('backquote')

866 matchend = match.end('backquote')

867 rolestart = match.start('role')

868 role = match.group('role')

869 position = ''

870 if role:

871 role = role[1:-1]

872 position = 'prefix'

873 elif self.quoted_start(match):

874 return string[:matchend], [], string[matchend:], []

875 endmatch = end_pattern.search(string[matchend:])

876 if endmatch and endmatch.start(1): # 1 or more chars

877 textend = matchend + endmatch.end()

878 if endmatch.group('role'):

879 if role:

880 msg = self.reporter.warning(

881 'Multiple roles in interpreted text (both '

882 'prefix and suffix present; only one allowed).',

883 line=lineno)

884 text = unescape(string[rolestart:textend], True)

885 prb = self.problematic(text, text, msg)

886 return string[:rolestart], [prb], string[textend:], [msg]

887 role = endmatch.group('suffix')[1:-1]

888 position = 'suffix'

889 escaped = endmatch.string[:endmatch.start(1)]

890 rawsource = unescape(string[matchstart:textend], True)

891 if rawsource[-1:] == '_':

892 if role:

893 msg = self.reporter.warning(

894 'Mismatch: both interpreted text role %s and '

895 'reference suffix.' % position, line=lineno)

896 text = unescape(string[rolestart:textend], True)

897 prb = self.problematic(text, text, msg)

898 return string[:rolestart], [prb], string[textend:], [msg]

899 return self.phrase_ref(string[:matchstart], string[textend:],

900 rawsource, escaped)

901 else:

902 rawsource = unescape(string[rolestart:textend], True)

903 nodelist, messages = self.interpreted(rawsource, escaped, role,

904 lineno)

905 return (string[:rolestart], nodelist,

906 string[textend:], messages)

907 msg = self.reporter.warning(

908 'Inline interpreted text or phrase reference start-string '

909 'without end-string.', line=lineno)

910 text = unescape(string[matchstart:matchend], True)

911 prb = self.problematic(text, text, msg)

912 return string[:matchstart], [prb], string[matchend:], [msg]

913

914 def phrase_ref(self, before, after, rawsource, escaped, text=None):

915 # `text` is ignored (since 0.16)

916 match = self.patterns.embedded_link.search(escaped)

917 if match: # embedded <URI> or <alias_>

918 text = escaped[:match.start(0)]

919 unescaped = unescape(text)

920 rawtext = unescape(text, True)

921 aliastext = match.group(2)

922 rawaliastext = unescape(aliastext, True)

923 underscore_escaped = rawaliastext.endswith(r'\_')

924 if (aliastext.endswith('_')

925 and not (underscore_escaped

926 or self.patterns.uri.match(aliastext))):

927 aliastype = 'name'

928 alias = normalize_name(unescape(aliastext[:-1]))

929 target = nodes.target(match.group(1), refname=alias)

930 target.indirect_reference_name = whitespace_normalize_name(

931 unescape(aliastext[:-1]))

932 else:

933 aliastype = 'uri'

934 # remove unescaped whitespace

935 alias_parts = split_escaped_whitespace(match.group(2))

936 alias = ' '.join(''.join(part.split())

937 for part in alias_parts)

938 alias = self.adjust_uri(unescape(alias))

939 if alias.endswith(r'\_'):

940 alias = alias[:-2] + '_'

941 target = nodes.target(match.group(1), refuri=alias)

942 target.referenced = 1

943 if not aliastext:

944 raise ApplicationError('problem with embedded link: %r'

945 % aliastext)

946 if not text:

947 text = alias

948 unescaped = unescape(text)

949 rawtext = rawaliastext

950 else:

951 text = escaped

952 unescaped = unescape(text)

953 target = None

954 rawtext = unescape(escaped, True)

955

956 refname = normalize_name(unescaped)

957 reference = nodes.reference(rawsource, text,

958 name=whitespace_normalize_name(unescaped))

959 reference[0].rawsource = rawtext

960

961 node_list = [reference]

962

963 if rawsource[-2:] == '__':

964 if target and (aliastype == 'name'):

965 reference['refname'] = alias

966 self.document.note_refname(reference)

967 # self.document.note_indirect_target(target) # required?

968 elif target and (aliastype == 'uri'):

969 reference['refuri'] = alias

970 else:

971 reference['anonymous'] = True

972 else:

973 if target:

974 target['names'].append(refname)

975 if aliastype == 'name':

976 reference['refname'] = alias

977 self.document.note_indirect_target(target)

978 self.document.note_refname(reference)

979 else:

980 reference['refuri'] = alias

981 # target.note_referenced_by(name=refname)

982 self.document.note_implicit_target(target, self.parent)

983 node_list.append(target)

984 else:

985 reference['refname'] = refname

986 self.document.note_refname(reference)

987 return before, node_list, after, []

988

989 def adjust_uri(self, uri):

990 match = self.patterns.email.match(uri)

991 if match:

992 return 'mailto:' + uri

993 else:

994 return uri

995

996 def interpreted(self, rawsource, text, role, lineno):

997 role_fn, messages = roles.role(role, self.language, lineno,

998 self.reporter)

999 if role_fn:

1000 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)

1001 return nodes, messages + messages2

1002 else:

1003 msg = self.reporter.error(

1004 'Unknown interpreted text role "%s".' % role,

1005 line=lineno)

1006 return ([self.problematic(rawsource, rawsource, msg)],

1007 messages + [msg])

1008

1009 def literal(self, match, lineno):

1010 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

1011 match, lineno, self.patterns.literal, nodes.literal,

1012 restore_backslashes=True)

1013 return before, inlines, remaining, sysmessages

1014

1015 def inline_internal_target(self, match, lineno):

1016 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

1017 match, lineno, self.patterns.target, nodes.target)

1018 if inlines and isinstance(inlines[0], nodes.target):

1019 assert len(inlines) == 1

1020 target = inlines[0]

1021 name = normalize_name(target.astext())

1022 target['names'].append(name)

1023 self.document.note_explicit_target(target, self.parent)

1024 return before, inlines, remaining, sysmessages

1025

1026 def substitution_reference(self, match, lineno):

1027 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

1028 match, lineno, self.patterns.substitution_ref,

1029 nodes.substitution_reference)

1030 if len(inlines) == 1:

1031 subref_node = inlines[0]

1032 if isinstance(subref_node, nodes.substitution_reference):

1033 subref_text = subref_node.astext()

1034 self.document.note_substitution_ref(subref_node, subref_text)

1035 if endstring[-1:] == '_':

1036 reference_node = nodes.reference(

1037 '|%s%s' % (subref_text, endstring), '')

1038 if endstring[-2:] == '__':

1039 reference_node['anonymous'] = True

1040 else:

1041 reference_node['refname'] = normalize_name(subref_text)

1042 self.document.note_refname(reference_node)

1043 reference_node += subref_node

1044 inlines = [reference_node]

1045 return before, inlines, remaining, sysmessages

1046

1047 def footnote_reference(self, match, lineno):

1048 """

1049 Handles `nodes.footnote_reference` and `nodes.citation_reference`

1050 elements.

1051 """

1052 label = match.group('footnotelabel')

1053 refname = normalize_name(label)

1054 string = match.string

1055 before = string[:match.start('whole')]

1056 remaining = string[match.end('whole'):]

1057 if match.group('citationlabel'):

1058 refnode = nodes.citation_reference('[%s]_' % label,

1059 refname=refname)

1060 refnode += nodes.Text(label)

1061 self.document.note_citation_ref(refnode)

1062 else:

1063 refnode = nodes.footnote_reference('[%s]_' % label)

1064 if refname[0] == '#':

1065 refname = refname[1:]

1066 refnode['auto'] = 1

1067 self.document.note_autofootnote_ref(refnode)

1068 elif refname == '*':

1069 refname = ''

1070 refnode['auto'] = '*'

1071 self.document.note_symbol_footnote_ref(

1072 refnode)

1073 else:

1074 refnode += nodes.Text(label)

1075 if refname:

1076 refnode['refname'] = refname

1077 self.document.note_footnote_ref(refnode)

1078 if utils.get_trim_footnote_ref_space(self.document.settings):

1079 before = before.rstrip()

1080 return before, [refnode], remaining, []

1081

1082 def reference(self, match, lineno, anonymous=False):

1083 referencename = match.group('refname')

1084 refname = normalize_name(referencename)

1085 referencenode = nodes.reference(

1086 referencename + match.group('refend'), referencename,

1087 name=whitespace_normalize_name(referencename))

1088 referencenode[0].rawsource = referencename

1089 if anonymous:

1090 referencenode['anonymous'] = True

1091 else:

1092 referencenode['refname'] = refname

1093 self.document.note_refname(referencenode)

1094 string = match.string

1095 matchstart = match.start('whole')

1096 matchend = match.end('whole')

1097 return string[:matchstart], [referencenode], string[matchend:], []

1098

1099 def anonymous_reference(self, match, lineno):

1100 return self.reference(match, lineno, anonymous=True)

1101

1102 def standalone_uri(self, match, lineno):

1103 if (not match.group('scheme')

1104 or match.group('scheme').lower() in urischemes.schemes):

1105 if match.group('email'):

1106 addscheme = 'mailto:'

1107 else:

1108 addscheme = ''

1109 text = match.group('whole')

1110 refuri = addscheme + unescape(text)

1111 reference = nodes.reference(unescape(text, True), text,

1112 refuri=refuri)

1113 return [reference]

1114 else: # not a valid scheme

1115 raise MarkupMismatch

1116

1117 def pep_reference(self, match, lineno):

1118 text = match.group(0)

1119 if text.startswith('pep-'):

1120 pepnum = int(unescape(match.group('pepnum1')))

1121 elif text.startswith('PEP'):

1122 pepnum = int(unescape(match.group('pepnum2')))

1123 else:

1124 raise MarkupMismatch

1125 ref = (self.document.settings.pep_base_url

1126 + self.document.settings.pep_file_url_template % pepnum)

1127 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1128

1129 rfc_url = 'rfc%d.html'

1130

1131 def rfc_reference(self, match, lineno):

1132 text = match.group(0)

1133 if text.startswith('RFC'):

1134 rfcnum = int(unescape(match.group('rfcnum')))

1135 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum

1136 else:

1137 raise MarkupMismatch

1138 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1139

1140 def implicit_inline(self, text, lineno):

1141 """

1142 Check each of the patterns in `self.implicit_dispatch` for a match,

1143 and dispatch to the stored method for the pattern. Recursively check

1144 the text before and after the match. Return a list of `nodes.Text`

1145 and inline element nodes.

1146 """

1147 if not text:

1148 return []

1149 for pattern, method in self.implicit_dispatch:

1150 match = pattern.search(text)

1151 if match:

1152 try:

1153 # Must recurse on strings before *and* after the match;

1154 # there may be multiple patterns.

1155 return (self.implicit_inline(text[:match.start()], lineno)

1156 + method(match, lineno)

1157 + self.implicit_inline(text[match.end():], lineno))

1158 except MarkupMismatch:

1159 pass

1160 return [nodes.Text(text)]

1161

1162 dispatch = {'*': emphasis,

1163 '**': strong,

1164 '`': interpreted_or_phrase_ref,

1165 '``': literal,

1166 '_`': inline_internal_target,

1167 ']_': footnote_reference,

1168 '|': substitution_reference,

1169 '_': reference,

1170 '__': anonymous_reference}

1171

1172

1173def _loweralpha_to_int(s, _zero=(ord('a')-1)):

1174 return ord(s) - _zero

1175

1176

1177def _upperalpha_to_int(s, _zero=(ord('A')-1)):

1178 return ord(s) - _zero

1179

1180

1181class Body(RSTState):

1182

1183 """

1184 Generic classifier of the first line of a block.

1185 """

1186

1187 double_width_pad_char = tableparser.TableParser.double_width_pad_char

1188 """Padding character for East Asian double-width text."""

1189

1190 enum = Struct()

1191 """Enumerated list parsing information."""

1192

1193 enum.formatinfo = {

1194 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),

1195 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),

1196 'period': Struct(prefix='', suffix='.', start=0, end=-1)}

1197 enum.formats = enum.formatinfo.keys()

1198 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',

1199 'lowerroman', 'upperroman'] # ORDERED!

1200 enum.sequencepats = {'arabic': '[0-9]+',

1201 'loweralpha': '[a-z]',

1202 'upperalpha': '[A-Z]',

1203 'lowerroman': '[ivxlcdm]+',

1204 'upperroman': '[IVXLCDM]+'}

1205 enum.converters = {'arabic': int,

1206 'loweralpha': _loweralpha_to_int,

1207 'upperalpha': _upperalpha_to_int,

1208 'lowerroman': RomanNumeral.from_string,

1209 'upperroman': RomanNumeral.from_string}

1210

1211 enum.sequenceregexps = {}

1212 for sequence in enum.sequences:

1213 enum.sequenceregexps[sequence] = re.compile(

1214 enum.sequencepats[sequence] + '$')

1215

1216 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')

1217 """Matches the top (& bottom) of a full table)."""

1218

1219 simple_table_top_pat = re.compile('=+( +=+)+ *$')

1220 """Matches the top of a simple table."""

1221

1222 simple_table_border_pat = re.compile('=+[ =]*$')

1223 """Matches the bottom & header bottom of a simple table."""

1224

1225 pats = {}

1226 """Fragments of patterns used by transitions."""

1227

1228 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'

1229 pats['alpha'] = '[a-zA-Z]'

1230 pats['alphanum'] = '[a-zA-Z0-9]'

1231 pats['alphanumplus'] = '[a-zA-Z0-9_-]'

1232 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'

1233 '|%(upperroman)s|#)' % enum.sequencepats)

1234 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats

1235 # @@@ Loosen up the pattern? Allow Unicode?

1236 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats

1237 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats

1238 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats

1239 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats

1240

1241 for format in enum.formats:

1242 pats[format] = '(?P<%s>%s%s%s)' % (

1243 format, re.escape(enum.formatinfo[format].prefix),

1244 pats['enum'], re.escape(enum.formatinfo[format].suffix))

1245

1246 patterns = {

1247 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',

1248 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,

1249 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',

1250 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,

1251 'doctest': r'>>>( +|$)',

1252 'line_block': r'\|( +|$)',

1253 'grid_table_top': grid_table_top_pat,

1254 'simple_table_top': simple_table_top_pat,

1255 'explicit_markup': r'\.\.( +|$)',

1256 'anonymous': r'__( +|$)',

1257 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,

1258 'text': r''}

1259 initial_transitions = (

1260 'bullet',

1261 'enumerator',

1262 'field_marker',

1263 'option_marker',

1264 'doctest',

1265 'line_block',

1266 'grid_table_top',

1267 'simple_table_top',

1268 'explicit_markup',

1269 'anonymous',

1270 'line',

1271 'text')

1272

1273 def indent(self, match, context, next_state):

1274 """Block quote."""

1275 (indented, indent, line_offset, blank_finish

1276 ) = self.state_machine.get_indented()

1277 elements = self.block_quote(indented, line_offset)

1278 self.parent += elements

1279 if not blank_finish:

1280 self.parent += self.unindent_warning('Block quote')

1281 return context, next_state, []

1282

1283 def block_quote(self, indented, line_offset):

1284 elements = []

1285 while indented:

1286 blockquote = nodes.block_quote(rawsource='\n'.join(indented))

1287 (blockquote.source, blockquote.line

1288 ) = self.state_machine.get_source_and_line(line_offset+1)

1289 (blockquote_lines,

1290 attribution_lines,

1291 attribution_offset,

1292 indented,

1293 new_line_offset) = self.split_attribution(indented, line_offset)

1294 self.nested_parse(blockquote_lines, line_offset, blockquote)

1295 elements.append(blockquote)

1296 if attribution_lines:

1297 attribution, messages = self.parse_attribution(

1298 attribution_lines, line_offset+attribution_offset)

1299 blockquote += attribution

1300 elements += messages

1301 line_offset = new_line_offset

1302 while indented and not indented[0]:

1303 indented = indented[1:]

1304 line_offset += 1

1305 return elements

1306

1307 # U+2014 is an em-dash:

1308 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')

1309

1310 def split_attribution(self, indented, line_offset):

1311 """

1312 Check for a block quote attribution and split it off:

1313

1314 * First line after a blank line must begin with a dash ("--", "---",

1315 em-dash; matches `self.attribution_pattern`).

1316 * Every line after that must have consistent indentation.

1317 * Attributions must be preceded by block quote content.

1318

1319 Return a tuple of: (block quote content lines, attribution lines,

1320 attribution offset, remaining indented lines, remaining lines offset).

1321 """

1322 blank = None

1323 nonblank_seen = False

1324 for i in range(len(indented)):

1325 line = indented[i].rstrip()

1326 if line:

1327 if nonblank_seen and blank == i - 1: # last line blank

1328 match = self.attribution_pattern.match(line)

1329 if match:

1330 attribution_end, indent = self.check_attribution(

1331 indented, i)

1332 if attribution_end:

1333 a_lines = indented[i:attribution_end]

1334 a_lines.trim_left(match.end(), end=1)

1335 a_lines.trim_left(indent, start=1)

1336 return (indented[:i], a_lines,

1337 i, indented[attribution_end:],

1338 line_offset + attribution_end)

1339 nonblank_seen = True

1340 else:

1341 blank = i

1342 else:

1343 return indented, None, None, None, None

1344

1345 def check_attribution(self, indented, attribution_start):

1346 """

1347 Check attribution shape.

1348 Return the index past the end of the attribution, and the indent.

1349 """

1350 indent = None

1351 i = attribution_start + 1

1352 for i in range(attribution_start + 1, len(indented)):

1353 line = indented[i].rstrip()

1354 if not line:

1355 break

1356 if indent is None:

1357 indent = len(line) - len(line.lstrip())

1358 elif len(line) - len(line.lstrip()) != indent:

1359 return None, None # bad shape; not an attribution

1360 else:

1361 # return index of line after last attribution line:

1362 i += 1

1363 return i, (indent or 0)

1364

1365 def parse_attribution(self, indented, line_offset):

1366 text = '\n'.join(indented).rstrip()

1367 lineno = 1 + line_offset # line_offset is zero-based

1368 textnodes, messages = self.inline_text(text, lineno)

1369 node = nodes.attribution(text, '', *textnodes)

1370 node.source, node.line = self.state_machine.get_source_and_line(lineno)

1371 return node, messages

1372

1373 def bullet(self, match, context, next_state):

1374 """Bullet list item."""

1375 ul = nodes.bullet_list()

1376 ul.source, ul.line = self.state_machine.get_source_and_line()

1377 self.parent += ul

1378 ul['bullet'] = match.string[0]

1379 i, blank_finish = self.list_item(match.end())

1380 ul += i

1381 offset = self.state_machine.line_offset + 1 # next line

1382 new_line_offset, blank_finish = self.nested_list_parse(

1383 self.state_machine.input_lines[offset:],

1384 input_offset=self.state_machine.abs_line_offset() + 1,

1385 node=ul, initial_state='BulletList',

1386 blank_finish=blank_finish)

1387 self.goto_line(new_line_offset)

1388 if not blank_finish:

1389 self.parent += self.unindent_warning('Bullet list')

1390 return [], next_state, []

1391

1392 def list_item(self, indent):

1393 src, srcline = self.state_machine.get_source_and_line()

1394 if self.state_machine.line[indent:]:

1395 indented, line_offset, blank_finish = (

1396 self.state_machine.get_known_indented(indent))

1397 else:

1398 indented, indent, line_offset, blank_finish = (

1399 self.state_machine.get_first_known_indented(indent))

1400 listitem = nodes.list_item('\n'.join(indented))

1401 listitem.source, listitem.line = src, srcline

1402 if indented:

1403 self.nested_parse(indented, input_offset=line_offset,

1404 node=listitem)

1405 return listitem, blank_finish

1406

1407 def enumerator(self, match, context, next_state):

1408 """Enumerated List Item"""

1409 format, sequence, text, ordinal = self.parse_enumerator(match)

1410 if not self.is_enumerated_list_item(ordinal, sequence, format):

1411 raise statemachine.TransitionCorrection('text')

1412 enumlist = nodes.enumerated_list()

1413 (enumlist.source,

1414 enumlist.line) = self.state_machine.get_source_and_line()

1415 self.parent += enumlist

1416 if sequence == '#':

1417 enumlist['enumtype'] = 'arabic'

1418 else:

1419 enumlist['enumtype'] = sequence

1420 enumlist['prefix'] = self.enum.formatinfo[format].prefix

1421 enumlist['suffix'] = self.enum.formatinfo[format].suffix

1422 if ordinal != 1:

1423 enumlist['start'] = ordinal

1424 msg = self.reporter.info(

1425 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'

1426 % (text, ordinal), base_node=enumlist)

1427 self.parent += msg

1428 listitem, blank_finish = self.list_item(match.end())

1429 enumlist += listitem

1430 offset = self.state_machine.line_offset + 1 # next line

1431 newline_offset, blank_finish = self.nested_list_parse(

1432 self.state_machine.input_lines[offset:],

1433 input_offset=self.state_machine.abs_line_offset() + 1,

1434 node=enumlist, initial_state='EnumeratedList',

1435 blank_finish=blank_finish,

1436 extra_settings={'lastordinal': ordinal,

1437 'format': format,

1438 'auto': sequence == '#'})

1439 self.goto_line(newline_offset)

1440 if not blank_finish:

1441 self.parent += self.unindent_warning('Enumerated list')

1442 return [], next_state, []

1443

1444 def parse_enumerator(self, match, expected_sequence=None):

1445 """

1446 Analyze an enumerator and return the results.

1447

1448 :Return:

1449 - the enumerator format ('period', 'parens', or 'rparen'),

1450 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),

1451 - the text of the enumerator, stripped of formatting, and

1452 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;

1453 ``None`` is returned for invalid enumerator text).

1454

1455 The enumerator format has already been determined by the regular

1456 expression match. If `expected_sequence` is given, that sequence is

1457 tried first. If not, we check for Roman numeral 1. This way,

1458 single-character Roman numerals (which are also alphabetical) can be

1459 matched. If no sequence has been matched, all sequences are checked in

1460 order.

1461 """

1462 groupdict = match.groupdict()

1463 sequence = ''

1464 for format in self.enum.formats:

1465 if groupdict[format]: # was this the format matched?

1466 break # yes; keep `format`

1467 else: # shouldn't happen

1468 raise ParserError('enumerator format not matched')

1469 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501

1470 : self.enum.formatinfo[format].end]

1471 if text == '#':

1472 sequence = '#'

1473 elif expected_sequence:

1474 try:

1475 if self.enum.sequenceregexps[expected_sequence].match(text):

1476 sequence = expected_sequence

1477 except KeyError: # shouldn't happen

1478 raise ParserError('unknown enumerator sequence: %s'

1479 % sequence)

1480 elif text == 'i':

1481 sequence = 'lowerroman'

1482 elif text == 'I':

1483 sequence = 'upperroman'

1484 if not sequence:

1485 for sequence in self.enum.sequences:

1486 if self.enum.sequenceregexps[sequence].match(text):

1487 break

1488 else: # shouldn't happen

1489 raise ParserError('enumerator sequence not matched')

1490 if sequence == '#':

1491 ordinal = 1

1492 else:

1493 try:

1494 ordinal = int(self.enum.converters[sequence](text))

1495 except InvalidRomanNumeralError:

1496 ordinal = None

1497 return format, sequence, text, ordinal

1498

1499 def is_enumerated_list_item(self, ordinal, sequence, format):

1500 """

1501 Check validity based on the ordinal value and the second line.

1502

1503 Return true if the ordinal is valid and the second line is blank,

1504 indented, or starts with the next enumerator or an auto-enumerator.

1505 """

1506 if ordinal is None:

1507 return None

1508 try:

1509 next_line = self.state_machine.next_line()

1510 except EOFError: # end of input lines

1511 self.state_machine.previous_line()

1512 return 1

1513 else:

1514 self.state_machine.previous_line()

1515 if not next_line[:1].strip(): # blank or indented

1516 return 1

1517 result = self.make_enumerator(ordinal + 1, sequence, format)

1518 if result:

1519 next_enumerator, auto_enumerator = result

1520 try:

1521 if next_line.startswith((next_enumerator, auto_enumerator)):

1522 return 1

1523 except TypeError:

1524 pass

1525 return None

1526

1527 def make_enumerator(self, ordinal, sequence, format):

1528 """

1529 Construct and return the next enumerated list item marker, and an

1530 auto-enumerator ("#" instead of the regular enumerator).

1531

1532 Return ``None`` for invalid (out of range) ordinals.

1533 """

1534 if sequence == '#':

1535 enumerator = '#'

1536 elif sequence == 'arabic':

1537 enumerator = str(ordinal)

1538 else:

1539 if sequence.endswith('alpha'):

1540 if ordinal > 26:

1541 return None

1542 enumerator = chr(ordinal + ord('a') - 1)

1543 elif sequence.endswith('roman'):

1544 try:

1545 enumerator = RomanNumeral(ordinal).to_uppercase()

1546 except TypeError:

1547 return None

1548 else: # shouldn't happen

1549 raise ParserError('unknown enumerator sequence: "%s"'

1550 % sequence)

1551 if sequence.startswith('lower'):

1552 enumerator = enumerator.lower()

1553 elif sequence.startswith('upper'):

1554 enumerator = enumerator.upper()

1555 else: # shouldn't happen

1556 raise ParserError('unknown enumerator sequence: "%s"'

1557 % sequence)

1558 formatinfo = self.enum.formatinfo[format]

1559 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix

1560 + ' ')

1561 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '

1562 return next_enumerator, auto_enumerator

1563

1564 def field_marker(self, match, context, next_state):

1565 """Field list item."""

1566 field_list = nodes.field_list()

1567 self.parent += field_list

1568 field, blank_finish = self.field(match)

1569 field_list += field

1570 offset = self.state_machine.line_offset + 1 # next line

1571 newline_offset, blank_finish = self.nested_list_parse(

1572 self.state_machine.input_lines[offset:],

1573 input_offset=self.state_machine.abs_line_offset() + 1,

1574 node=field_list, initial_state='FieldList',

1575 blank_finish=blank_finish)

1576 self.goto_line(newline_offset)

1577 if not blank_finish:

1578 self.parent += self.unindent_warning('Field list')

1579 return [], next_state, []

1580

1581 def field(self, match):

1582 name = self.parse_field_marker(match)

1583 src, srcline = self.state_machine.get_source_and_line()

1584 lineno = self.state_machine.abs_line_number()

1585 (indented, indent, line_offset, blank_finish

1586 ) = self.state_machine.get_first_known_indented(match.end())

1587 field_node = nodes.field()

1588 field_node.source = src

1589 field_node.line = srcline

1590 name_nodes, name_messages = self.inline_text(name, lineno)

1591 field_node += nodes.field_name(name, '', *name_nodes)

1592 field_body = nodes.field_body('\n'.join(indented), *name_messages)

1593 field_node += field_body

1594 if indented:

1595 self.parse_field_body(indented, line_offset, field_body)

1596 return field_node, blank_finish

1597

1598 def parse_field_marker(self, match):

1599 """Extract & return field name from a field marker match."""

1600 field = match.group()[1:] # strip off leading ':'

1601 field = field[:field.rfind(':')] # strip off trailing ':' etc.

1602 return field

1603

1604 def parse_field_body(self, indented, offset, node) -> None:

1605 self.nested_parse(indented, input_offset=offset, node=node)

1606

1607 def option_marker(self, match, context, next_state):

1608 """Option list item."""

1609 optionlist = nodes.option_list()

1610 (optionlist.source, optionlist.line

1611 ) = self.state_machine.get_source_and_line()

1612 try:

1613 listitem, blank_finish = self.option_list_item(match)

1614 except MarkupError as error:

1615 # This shouldn't happen; pattern won't match.

1616 msg = self.reporter.error('Invalid option list marker: %s'

1617 % error)

1618 self.parent += msg

1619 (indented, indent, line_offset, blank_finish

1620 ) = self.state_machine.get_first_known_indented(match.end())

1621 elements = self.block_quote(indented, line_offset)

1622 self.parent += elements

1623 if not blank_finish:

1624 self.parent += self.unindent_warning('Option list')

1625 return [], next_state, []

1626 self.parent += optionlist

1627 optionlist += listitem

1628 offset = self.state_machine.line_offset + 1 # next line

1629 newline_offset, blank_finish = self.nested_list_parse(

1630 self.state_machine.input_lines[offset:],

1631 input_offset=self.state_machine.abs_line_offset() + 1,

1632 node=optionlist, initial_state='OptionList',

1633 blank_finish=blank_finish)

1634 self.goto_line(newline_offset)

1635 if not blank_finish:

1636 self.parent += self.unindent_warning('Option list')

1637 return [], next_state, []

1638

1639 def option_list_item(self, match):

1640 offset = self.state_machine.abs_line_offset()

1641 options = self.parse_option_marker(match)

1642 (indented, indent, line_offset, blank_finish

1643 ) = self.state_machine.get_first_known_indented(match.end())

1644 if not indented: # not an option list item

1645 self.goto_line(offset)

1646 raise statemachine.TransitionCorrection('text')

1647 option_group = nodes.option_group('', *options)

1648 description = nodes.description('\n'.join(indented))

1649 option_list_item = nodes.option_list_item('', option_group,

1650 description)

1651 if indented:

1652 self.nested_parse(indented, input_offset=line_offset,

1653 node=description)

1654 return option_list_item, blank_finish

1655

1656 def parse_option_marker(self, match):

1657 """

1658 Return a list of `node.option` and `node.option_argument` objects,

1659 parsed from an option marker match.

1660

1661 :Exception: `MarkupError` for invalid option markers.

1662 """

1663 optlist = []

1664 # split at ", ", except inside < > (complex arguments)

1665 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())

1666 for optionstring in optionstrings:

1667 tokens = optionstring.split()

1668 delimiter = ' '

1669 firstopt = tokens[0].split('=', 1)

1670 if len(firstopt) > 1:

1671 # "--opt=value" form

1672 tokens[:1] = firstopt

1673 delimiter = '='

1674 elif (len(tokens[0]) > 2

1675 and ((tokens[0].startswith('-')

1676 and not tokens[0].startswith('--'))

1677 or tokens[0].startswith('+'))):

1678 # "-ovalue" form

1679 tokens[:1] = [tokens[0][:2], tokens[0][2:]]

1680 delimiter = ''

1681 if len(tokens) > 1 and (tokens[1].startswith('<')

1682 and tokens[-1].endswith('>')):

1683 # "-o <value1 value2>" form; join all values into one token

1684 tokens[1:] = [' '.join(tokens[1:])]

1685 if 0 < len(tokens) <= 2:

1686 option = nodes.option(optionstring)

1687 option += nodes.option_string(tokens[0], tokens[0])

1688 if len(tokens) > 1:

1689 option += nodes.option_argument(tokens[1], tokens[1],

1690 delimiter=delimiter)

1691 optlist.append(option)

1692 else:

1693 raise MarkupError(

1694 'wrong number of option tokens (=%s), should be 1 or 2: '

1695 '"%s"' % (len(tokens), optionstring))

1696 return optlist

1697

1698 def doctest(self, match, context, next_state):

1699 line = self.document.current_line

1700 data = '\n'.join(self.state_machine.get_text_block())

1701 # TODO: Parse with `directives.body.CodeBlock` with

1702 # argument 'pycon' (Python Console) in Docutils 1.0.

1703 n = nodes.doctest_block(data, data)

1704 n.line = line

1705 self.parent += n

1706 return [], next_state, []

1707

1708 def line_block(self, match, context, next_state):

1709 """First line of a line block."""

1710 block = nodes.line_block()

1711 self.parent += block

1712 lineno = self.state_machine.abs_line_number()

1713 (block.source,

1714 block.line) = self.state_machine.get_source_and_line(lineno)

1715 line, messages, blank_finish = self.line_block_line(match, lineno)

1716 block += line

1717 self.parent += messages

1718 if not blank_finish:

1719 offset = self.state_machine.line_offset + 1 # next line

1720 new_line_offset, blank_finish = self.nested_list_parse(

1721 self.state_machine.input_lines[offset:],

1722 input_offset=self.state_machine.abs_line_offset() + 1,

1723 node=block, initial_state='LineBlock',

1724 blank_finish=False)

1725 self.goto_line(new_line_offset)

1726 if not blank_finish:

1727 self.parent += self.reporter.warning(

1728 'Line block ends without a blank line.',

1729 line=lineno+1)

1730 if len(block):

1731 if block[0].indent is None:

1732 block[0].indent = 0

1733 self.nest_line_block_lines(block)

1734 return [], next_state, []

1735

1736 def line_block_line(self, match, lineno):

1737 """Return one line element of a line_block."""

1738 (indented, indent, line_offset, blank_finish

1739 ) = self.state_machine.get_first_known_indented(match.end(),

1740 until_blank=True)

1741 text = '\n'.join(indented)

1742 text_nodes, messages = self.inline_text(text, lineno)

1743 line = nodes.line(text, '', *text_nodes)

1744 (line.source,

1745 line.line) = self.state_machine.get_source_and_line(lineno)

1746 if match.string.rstrip() != '|': # not empty

1747 line.indent = len(match.group(1)) - 1

1748 return line, messages, blank_finish

1749

1750 def nest_line_block_lines(self, block) -> None:

1751 for index in range(1, len(block)):

1752 if block[index].indent is None:

1753 block[index].indent = block[index - 1].indent

1754 self.nest_line_block_segment(block)

1755

1756 def nest_line_block_segment(self, block) -> None:

1757 indents = [item.indent for item in block]

1758 least = min(indents)

1759 new_items = []

1760 new_block = nodes.line_block()

1761 for item in block:

1762 if item.indent > least:

1763 new_block.append(item)

1764 else:

1765 if len(new_block):

1766 self.nest_line_block_segment(new_block)

1767 new_items.append(new_block)

1768 new_block = nodes.line_block()

1769 new_items.append(item)

1770 if len(new_block):

1771 self.nest_line_block_segment(new_block)

1772 new_items.append(new_block)

1773 block[:] = new_items

1774

1775 def grid_table_top(self, match, context, next_state):

1776 """Top border of a full table."""

1777 return self.table_top(match, context, next_state,

1778 self.isolate_grid_table,

1779 tableparser.GridTableParser)

1780

1781 def simple_table_top(self, match, context, next_state):

1782 """Top border of a simple table."""

1783 return self.table_top(match, context, next_state,

1784 self.isolate_simple_table,

1785 tableparser.SimpleTableParser)

1786

1787 def table_top(self, match, context, next_state,

1788 isolate_function, parser_class):

1789 """Top border of a generic table."""

1790 nodelist, blank_finish = self.table(isolate_function, parser_class)

1791 self.parent += nodelist

1792 if not blank_finish:

1793 msg = self.reporter.warning(

1794 'Blank line required after table.',

1795 line=self.state_machine.abs_line_number()+1)

1796 self.parent += msg

1797 return [], next_state, []

1798

1799 def table(self, isolate_function, parser_class):

1800 """Parse a table."""

1801 block, messages, blank_finish = isolate_function()

1802 if block:

1803 try:

1804 parser = parser_class()

1805 tabledata = parser.parse(block)

1806 tableline = (self.state_machine.abs_line_number() - len(block)

1807 + 1)

1808 table = self.build_table(tabledata, tableline)

1809 nodelist = [table] + messages

1810 except tableparser.TableMarkupError as err:

1811 nodelist = self.malformed_table(block, ' '.join(err.args),

1812 offset=err.offset) + messages

1813 else:

1814 nodelist = messages

1815 return nodelist, blank_finish

1816

1817 def isolate_grid_table(self):

1818 messages = []

1819 blank_finish = True

1820 try:

1821 block = self.state_machine.get_text_block(flush_left=True)

1822 except statemachine.UnexpectedIndentationError as err:

1823 block, src, srcline = err.args

1824 messages.append(self.reporter.error('Unexpected indentation.',

1825 source=src, line=srcline))

1826 blank_finish = False

1827 block.disconnect()

1828 # for East Asian chars:

1829 block.pad_double_width(self.double_width_pad_char)

1830 width = len(block[0].strip())

1831 for i in range(len(block)):

1832 block[i] = block[i].strip()

1833 if block[i][0] not in '+|': # check left edge

1834 blank_finish = False

1835 self.state_machine.previous_line(len(block) - i)

1836 del block[i:]

1837 break

1838 if not self.grid_table_top_pat.match(block[-1]): # find bottom

1839 # from second-last to third line of table:

1840 for i in range(len(block) - 2, 1, -1):

1841 if self.grid_table_top_pat.match(block[i]):

1842 self.state_machine.previous_line(len(block) - i + 1)

1843 del block[i+1:]

1844 blank_finish = False

1845 break

1846 else:

1847 detail = 'Bottom border missing or corrupt.'

1848 messages.extend(self.malformed_table(block, detail, i))

1849 return [], messages, blank_finish

1850 for i in range(len(block)): # check right edge

1851 if len(strip_combining_chars(block[i])

1852 ) != width or block[i][-1] not in '+|':

1853 detail = 'Right border not aligned or missing.'

1854 messages.extend(self.malformed_table(block, detail, i))

1855 return [], messages, blank_finish

1856 return block, messages, blank_finish

1857

1858 def isolate_simple_table(self):

1859 start = self.state_machine.line_offset

1860 lines = self.state_machine.input_lines

1861 limit = len(lines) - 1

1862 toplen = len(lines[start].strip())

1863 pattern_match = self.simple_table_border_pat.match

1864 found = 0

1865 found_at = None

1866 i = start + 1

1867 while i <= limit:

1868 line = lines[i]

1869 match = pattern_match(line)

1870 if match:

1871 if len(line.strip()) != toplen:

1872 self.state_machine.next_line(i - start)

1873 messages = self.malformed_table(

1874 lines[start:i+1], 'Bottom border or header rule does '

1875 'not match top border.', i-start)

1876 return [], messages, i == limit or not lines[i+1].strip()

1877 found += 1

1878 found_at = i

1879 if found == 2 or i == limit or not lines[i+1].strip():

1880 end = i

1881 break

1882 i += 1

1883 else: # reached end of input_lines

1884 details = 'No bottom table border found'

1885 if found:

1886 details += ' or no blank line after table bottom'

1887 self.state_machine.next_line(found_at - start)

1888 block = lines[start:found_at+1]

1889 else:

1890 self.state_machine.next_line(i - start - 1)

1891 block = lines[start:]

1892 messages = self.malformed_table(block, details + '.')

1893 return [], messages, not found

1894 self.state_machine.next_line(end - start)

1895 block = lines[start:end+1]

1896 # for East Asian chars:

1897 block.pad_double_width(self.double_width_pad_char)

1898 return block, [], end == limit or not lines[end+1].strip()

1899

1900 def malformed_table(self, block, detail='', offset=0):

1901 block.replace(self.double_width_pad_char, '')

1902 data = '\n'.join(block)

1903 message = 'Malformed table.'

1904 startline = self.state_machine.abs_line_number() - len(block) + 1

1905 if detail:

1906 message += '\n' + detail

1907 error = self.reporter.error(message, nodes.literal_block(data, data),

1908 line=startline+offset)

1909 return [error]

1910

1911 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):

1912 colwidths, headrows, bodyrows = tabledata

1913 table = nodes.table()

1914 if widths == 'auto':

1915 table['classes'] += ['colwidths-auto']

1916 elif widths: # "grid" or list of integers

1917 table['classes'] += ['colwidths-given']

1918 tgroup = nodes.tgroup(cols=len(colwidths))

1919 table += tgroup

1920 for colwidth in colwidths:

1921 colspec = nodes.colspec(colwidth=colwidth)

1922 if stub_columns:

1923 colspec.attributes['stub'] = True

1924 stub_columns -= 1

1925 tgroup += colspec

1926 if headrows:

1927 thead = nodes.thead()

1928 tgroup += thead

1929 for row in headrows:

1930 thead += self.build_table_row(row, tableline)

1931 tbody = nodes.tbody()

1932 tgroup += tbody

1933 for row in bodyrows:

1934 tbody += self.build_table_row(row, tableline)

1935 return table

1936

1937 def build_table_row(self, rowdata, tableline):

1938 row = nodes.row()

1939 for cell in rowdata:

1940 if cell is None:

1941 continue

1942 morerows, morecols, offset, cellblock = cell

1943 attributes = {}

1944 if morerows:

1945 attributes['morerows'] = morerows

1946 if morecols:

1947 attributes['morecols'] = morecols

1948 entry = nodes.entry(**attributes)

1949 row += entry

1950 if ''.join(cellblock):

1951 self.nested_parse(cellblock, input_offset=tableline+offset,

1952 node=entry)

1953 return row

1954

1955 explicit = Struct()

1956 """Patterns and constants used for explicit markup recognition."""

1957

1958 explicit.patterns = Struct(

1959 target=re.compile(r"""

1960 (

1961 _ # anonymous target

1962 | # *OR*

1963 (?!_) # no underscore at the beginning

1964 (?P<quote>`?) # optional open quote

1965 (?![ `]) # first char. not space or

1966 # backquote

1967 (?P<name> # reference name

1968 .+?

1969 )

1970 %(non_whitespace_escape_before)s

1971 (?P=quote) # close quote if open quote used

1972 )

1973 (?<!(?<!\x00):) # no unescaped colon at end

1974 %(non_whitespace_escape_before)s

1975 [ ]? # optional space

1976 : # end of reference name

1977 ([ ]+|$) # followed by whitespace

1978 """ % vars(Inliner), re.VERBOSE),

1979 reference=re.compile(r"""

1980 (

1981 (?P<simple>%(simplename)s)_

1982 | # *OR*

1983 ` # open backquote

1984 (?![ ]) # not space

1985 (?P<phrase>.+?) # hyperlink phrase

1986 %(non_whitespace_escape_before)s

1987 `_ # close backquote,

1988 # reference mark

1989 )

1990 $ # end of string

1991 """ % vars(Inliner), re.VERBOSE),

1992 substitution=re.compile(r"""

1993 (

1994 (?![ ]) # first char. not space

1995 (?P<name>.+?) # substitution text

1996 %(non_whitespace_escape_before)s

1997 \| # close delimiter

1998 )

1999 ([ ]+|$) # followed by whitespace

2000 """ % vars(Inliner),

2001 re.VERBOSE),)

2002

2003 def footnote(self, match):

2004 src, srcline = self.state_machine.get_source_and_line()

2005 (indented, indent, offset, blank_finish

2006 ) = self.state_machine.get_first_known_indented(match.end())

2007 label = match.group(1)

2008 name = normalize_name(label)

2009 footnote = nodes.footnote('\n'.join(indented))

2010 footnote.source = src

2011 footnote.line = srcline

2012 if name[0] == '#': # auto-numbered

2013 name = name[1:] # autonumber label

2014 footnote['auto'] = 1

2015 if name:

2016 footnote['names'].append(name)

2017 self.document.note_autofootnote(footnote)

2018 elif name == '*': # auto-symbol

2019 name = ''

2020 footnote['auto'] = '*'

2021 self.document.note_symbol_footnote(footnote)

2022 else: # manually numbered

2023 footnote += nodes.label('', label)

2024 footnote['names'].append(name)

2025 self.document.note_footnote(footnote)

2026 if name:

2027 self.document.note_explicit_target(footnote, footnote)

2028 else:

2029 self.document.set_id(footnote, footnote)

2030 if indented:

2031 self.nested_parse(indented, input_offset=offset, node=footnote)

2032 else:

2033 footnote += self.reporter.warning('Footnote content expected.')

2034 return [footnote], blank_finish

2035

2036 def citation(self, match):

2037 src, srcline = self.state_machine.get_source_and_line()

2038 (indented, indent, offset, blank_finish

2039 ) = self.state_machine.get_first_known_indented(match.end())

2040 label = match.group(1)

2041 name = normalize_name(label)

2042 citation = nodes.citation('\n'.join(indented))

2043 citation.source = src

2044 citation.line = srcline

2045 citation += nodes.label('', label)

2046 citation['names'].append(name)

2047 self.document.note_citation(citation)

2048 self.document.note_explicit_target(citation, citation)

2049 if indented:

2050 self.nested_parse(indented, input_offset=offset, node=citation)

2051 else:

2052 citation += self.reporter.warning('Citation content expected.')

2053 return [citation], blank_finish

2054

2055 def hyperlink_target(self, match):

2056 pattern = self.explicit.patterns.target

2057 lineno = self.state_machine.abs_line_number()

2058 (block, indent, offset, blank_finish

2059 ) = self.state_machine.get_first_known_indented(

2060 match.end(), until_blank=True, strip_indent=False)

2061 blocktext = match.string[:match.end()] + '\n'.join(block)

2062 block = [escape2null(line) for line in block]

2063 escaped = block[0]

2064 blockindex = 0

2065 while True:

2066 targetmatch = pattern.match(escaped)

2067 if targetmatch:

2068 break

2069 blockindex += 1

2070 try:

2071 escaped += block[blockindex]

2072 except IndexError:

2073 raise MarkupError('malformed hyperlink target.')

2074 del block[:blockindex]

2075 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()

2076 target = self.make_target(block, blocktext, lineno,

2077 targetmatch.group('name'))

2078 return [target], blank_finish

2079

2080 def make_target(self, block, block_text, lineno, target_name):

2081 target_type, data = self.parse_target(block, block_text, lineno)

2082 if target_type == 'refname':

2083 target = nodes.target(block_text, '', refname=normalize_name(data))

2084 target.indirect_reference_name = data

2085 self.add_target(target_name, '', target, lineno)

2086 self.document.note_indirect_target(target)

2087 return target

2088 elif target_type == 'refuri':

2089 target = nodes.target(block_text, '')

2090 self.add_target(target_name, data, target, lineno)

2091 return target

2092 else:

2093 return data

2094

2095 def parse_target(self, block, block_text, lineno):

2096 """

2097 Determine the type of reference of a target.

2098

2099 :Return: A 2-tuple, one of:

2100

2101 - 'refname' and the indirect reference name

2102 - 'refuri' and the URI

2103 - 'malformed' and a system_message node

2104 """

2105 if block and block[-1].strip()[-1:] == '_': # possible indirect target

2106 reference = ' '.join(line.strip() for line in block)

2107 refname = self.is_reference(reference)

2108 if refname:

2109 return 'refname', refname

2110 ref_parts = split_escaped_whitespace(' '.join(block))

2111 reference = ' '.join(''.join(unescape(part).split())

2112 for part in ref_parts)

2113 return 'refuri', reference

2114

2115 def is_reference(self, reference):

2116 match = self.explicit.patterns.reference.match(

2117 whitespace_normalize_name(reference))

2118 if not match:

2119 return None

2120 return unescape(match.group('simple') or match.group('phrase'))

2121

2122 def add_target(self, targetname, refuri, target, lineno):

2123 target.line = lineno

2124 if targetname:

2125 name = normalize_name(unescape(targetname))

2126 target['names'].append(name)

2127 if refuri:

2128 uri = self.inliner.adjust_uri(refuri)

2129 if uri:

2130 target['refuri'] = uri

2131 else:

2132 raise ApplicationError('problem with URI: %r' % refuri)

2133 self.document.note_explicit_target(target, self.parent)

2134 else: # anonymous target

2135 if refuri:

2136 target['refuri'] = refuri

2137 target['anonymous'] = True

2138 self.document.note_anonymous_target(target)

2139

2140 def substitution_def(self, match):

2141 pattern = self.explicit.patterns.substitution

2142 src, srcline = self.state_machine.get_source_and_line()

2143 (block, indent, offset, blank_finish

2144 ) = self.state_machine.get_first_known_indented(match.end(),

2145 strip_indent=False)

2146 blocktext = (match.string[:match.end()] + '\n'.join(block))

2147 block.disconnect()

2148 escaped = escape2null(block[0].rstrip())

2149 blockindex = 0

2150 while True:

2151 subdefmatch = pattern.match(escaped)

2152 if subdefmatch:

2153 break

2154 blockindex += 1

2155 try:

2156 escaped = escaped + ' ' + escape2null(

2157 block[blockindex].strip())

2158 except IndexError:

2159 raise MarkupError('malformed substitution definition.')

2160 del block[:blockindex] # strip out the substitution marker

2161 start = subdefmatch.end()-len(escaped)-1

2162 block[0] = (block[0].strip() + ' ')[start:-1]

2163 if not block[0]:

2164 del block[0]

2165 offset += 1

2166 while block and not block[-1].strip():

2167 block.pop()

2168 subname = subdefmatch.group('name')

2169 substitution_node = nodes.substitution_definition(blocktext)

2170 substitution_node.source = src

2171 substitution_node.line = srcline

2172 if not block:

2173 msg = self.reporter.warning(

2174 'Substitution definition "%s" missing contents.' % subname,

2175 nodes.literal_block(blocktext, blocktext),

2176 source=src, line=srcline)

2177 return [msg], blank_finish

2178 block[0] = block[0].strip()

2179 substitution_node['names'].append(

2180 nodes.whitespace_normalize_name(subname))

2181 new_abs_offset, blank_finish = self.nested_list_parse(

2182 block, input_offset=offset, node=substitution_node,

2183 initial_state='SubstitutionDef', blank_finish=blank_finish)

2184 i = 0

2185 for node in substitution_node[:]:

2186 if not (isinstance(node, nodes.Inline)

2187 or isinstance(node, nodes.Text)):

2188 self.parent += substitution_node[i]

2189 del substitution_node[i]

2190 else:

2191 i += 1

2192 for node in substitution_node.findall(nodes.Element):

2193 if self.disallowed_inside_substitution_definitions(node):

2194 pformat = nodes.literal_block('', node.pformat().rstrip())

2195 msg = self.reporter.error(

2196 'Substitution definition contains illegal element <%s>:'

2197 % node.tagname,

2198 pformat, nodes.literal_block(blocktext, blocktext),

2199 source=src, line=srcline)

2200 return [msg], blank_finish

2201 if len(substitution_node) == 0:

2202 msg = self.reporter.warning(

2203 'Substitution definition "%s" empty or invalid.' % subname,

2204 nodes.literal_block(blocktext, blocktext),

2205 source=src, line=srcline)

2206 return [msg], blank_finish

2207 self.document.note_substitution_def(

2208 substitution_node, subname, self.parent)

2209 return [substitution_node], blank_finish

2210

2211 def disallowed_inside_substitution_definitions(self, node) -> bool:

2212 if (node['ids']

2213 or isinstance(node, nodes.reference) and node.get('anonymous')

2214 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501

2215 return True

2216 else:

2217 return False

2218

2219 def directive(self, match, **option_presets):

2220 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""

2221 type_name = match.group(1)

2222 directive_class, messages = directives.directive(

2223 type_name, self.memo.language, self.document)

2224 self.parent += messages

2225 if directive_class:

2226 return self.run_directive(

2227 directive_class, match, type_name, option_presets)

2228 else:

2229 return self.unknown_directive(type_name)

2230

2231 def run_directive(self, directive, match, type_name, option_presets):

2232 """

2233 Parse a directive then run its directive function.

2234

2235 Parameters:

2236

2237 - `directive`: The class implementing the directive. Must be

2238 a subclass of `rst.Directive`.

2239

2240 - `match`: A regular expression match object which matched the first

2241 line of the directive.

2242

2243 - `type_name`: The directive name, as used in the source text.

2244

2245 - `option_presets`: A dictionary of preset options, defaults for the

2246 directive options. Currently, only an "alt" option is passed by

2247 substitution definitions (value: the substitution name), which may

2248 be used by an embedded image directive.

2249

2250 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.

2251 """

2252 if isinstance(directive, (FunctionType, MethodType)):

2253 from docutils.parsers.rst import convert_directive_function

2254 directive = convert_directive_function(directive)

2255 lineno = self.state_machine.abs_line_number()

2256 initial_line_offset = self.state_machine.line_offset

2257 (indented, indent, line_offset, blank_finish

2258 ) = self.state_machine.get_first_known_indented(match.end(),

2259 strip_top=0)

2260 block_text = '\n'.join(self.state_machine.input_lines[

2261 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501

2262 try:

2263 arguments, options, content, content_offset = (

2264 self.parse_directive_block(indented, line_offset,

2265 directive, option_presets))

2266 except MarkupError as detail:

2267 error = self.reporter.error(

2268 'Error in "%s" directive:\n%s.' % (type_name,

2269 ' '.join(detail.args)),

2270 nodes.literal_block(block_text, block_text), line=lineno)

2271 return [error], blank_finish

2272 directive_instance = directive(

2273 type_name, arguments, options, content, lineno,

2274 content_offset, block_text, self, self.state_machine)

2275 try:

2276 result = directive_instance.run()

2277 except docutils.parsers.rst.DirectiveError as error:

2278 msg_node = self.reporter.system_message(error.level, error.msg,

2279 line=lineno)

2280 msg_node += nodes.literal_block(block_text, block_text)

2281 result = [msg_node]

2282 assert isinstance(result, list), \

2283 'Directive "%s" must return a list of nodes.' % type_name

2284 for i in range(len(result)):

2285 assert isinstance(result[i], nodes.Node), \

2286 ('Directive "%s" returned non-Node object (index %s): %r'

2287 % (type_name, i, result[i]))

2288 return (result,

2289 blank_finish or self.state_machine.is_next_line_blank())

2290

2291 def parse_directive_block(self, indented, line_offset, directive,

2292 option_presets):

2293 option_spec = directive.option_spec

2294 has_content = directive.has_content

2295 if indented and not indented[0].strip():

2296 indented.trim_start()

2297 line_offset += 1

2298 while indented and not indented[-1].strip():

2299 indented.trim_end()

2300 if indented and (directive.required_arguments

2301 or directive.optional_arguments

2302 or option_spec):

2303 for i, line in enumerate(indented):

2304 if not line.strip():

2305 break

2306 else:

2307 i += 1

2308 arg_block = indented[:i]

2309 content = indented[i+1:]

2310 content_offset = line_offset + i + 1

2311 else:

2312 content = indented

2313 content_offset = line_offset

2314 arg_block = []

2315 if option_spec:

2316 options, arg_block = self.parse_directive_options(

2317 option_presets, option_spec, arg_block)

2318 else:

2319 options = {}

2320 if arg_block and not (directive.required_arguments

2321 or directive.optional_arguments):

2322 content = arg_block + indented[i:]

2323 content_offset = line_offset

2324 arg_block = []

2325 while content and not content[0].strip():

2326 content.trim_start()

2327 content_offset += 1

2328 if directive.required_arguments or directive.optional_arguments:

2329 arguments = self.parse_directive_arguments(

2330 directive, arg_block)

2331 else:

2332 arguments = []

2333 if content and not has_content:

2334 raise MarkupError('no content permitted')

2335 return arguments, options, content, content_offset

2336

2337 def parse_directive_options(self, option_presets, option_spec, arg_block):

2338 options = option_presets.copy()

2339 for i, line in enumerate(arg_block):

2340 if re.match(Body.patterns['field_marker'], line):

2341 opt_block = arg_block[i:]

2342 arg_block = arg_block[:i]

2343 break

2344 else:

2345 opt_block = []

2346 if opt_block:

2347 success, data = self.parse_extension_options(option_spec,

2348 opt_block)

2349 if success: # data is a dict of options

2350 options.update(data)

2351 else: # data is an error string

2352 raise MarkupError(data)

2353 return options, arg_block

2354

2355 def parse_directive_arguments(self, directive, arg_block):

2356 required = directive.required_arguments

2357 optional = directive.optional_arguments

2358 arg_text = '\n'.join(arg_block)

2359 arguments = arg_text.split()

2360 if len(arguments) < required:

2361 raise MarkupError('%s argument(s) required, %s supplied'

2362 % (required, len(arguments)))

2363 elif len(arguments) > required + optional:

2364 if directive.final_argument_whitespace:

2365 arguments = arg_text.split(None, required + optional - 1)

2366 else:

2367 raise MarkupError(

2368 'maximum %s argument(s) allowed, %s supplied'

2369 % (required + optional, len(arguments)))

2370 return arguments

2371

2372 def parse_extension_options(self, option_spec, datalines):

2373 """

2374 Parse `datalines` for a field list containing extension options

2375 matching `option_spec`.

2376

2377 :Parameters:

2378 - `option_spec`: a mapping of option name to conversion

2379 function, which should raise an exception on bad input.

2380 - `datalines`: a list of input strings.

2381

2382 :Return:

2383 - Success value, 1 or 0.

2384 - An option dictionary on success, an error string on failure.

2385 """

2386 node = nodes.field_list()

2387 newline_offset, blank_finish = self.nested_list_parse(

2388 datalines, 0, node, initial_state='ExtensionOptions',

2389 blank_finish=True)

2390 if newline_offset != len(datalines): # incomplete parse of block

2391 return 0, 'invalid option block'

2392 try:

2393 options = utils.extract_extension_options(node, option_spec)

2394 except KeyError as detail:

2395 return 0, 'unknown option: "%s"' % detail.args[0]

2396 except (ValueError, TypeError) as detail:

2397 return 0, 'invalid option value: %s' % ' '.join(detail.args)

2398 except utils.ExtensionOptionError as detail:

2399 return 0, 'invalid option data: %s' % ' '.join(detail.args)

2400 if blank_finish:

2401 return 1, options

2402 else:

2403 return 0, 'option data incompletely parsed'

2404

2405 def unknown_directive(self, type_name):

2406 lineno = self.state_machine.abs_line_number()

2407 (indented, indent, offset, blank_finish

2408 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)

2409 text = '\n'.join(indented)

2410 error = self.reporter.error('Unknown directive type "%s".' % type_name,

2411 nodes.literal_block(text, text),

2412 line=lineno)

2413 return [error], blank_finish

2414

2415 def comment(self, match):

2416 if self.state_machine.is_next_line_blank():

2417 first_comment_line = match.string[match.end():]

2418 if not first_comment_line.strip(): # empty comment

2419 return [nodes.comment()], True # "A tiny but practical wart."

2420 if first_comment_line.startswith('end of inclusion from "'):

2421 # cf. parsers.rst.directives.misc.Include

2422 self.document.include_log.pop()

2423 return [], True

2424 (indented, indent, offset, blank_finish

2425 ) = self.state_machine.get_first_known_indented(match.end())

2426 while indented and not indented[-1].strip():

2427 indented.trim_end()

2428 text = '\n'.join(indented)

2429 return [nodes.comment(text, text)], blank_finish

2430

2431 explicit.constructs = [

2432 (footnote,

2433 re.compile(r"""

2434 \.\.[ ]+ # explicit markup start

2435 \[

2436 ( # footnote label:

2437 [0-9]+ # manually numbered footnote

2438 | # *OR*

2439 \# # anonymous auto-numbered footnote

2440 | # *OR*

2441 \#%s # auto-number ed?) footnote label

2442 | # *OR*

2443 \* # auto-symbol footnote

2444 )

2445 \]

2446 ([ ]+|$) # whitespace or end of line

2447 """ % Inliner.simplename, re.VERBOSE)),

2448 (citation,

2449 re.compile(r"""

2450 \.\.[ ]+ # explicit markup start

2451 \[(%s)\] # citation label

2452 ([ ]+|$) # whitespace or end of line

2453 """ % Inliner.simplename, re.VERBOSE)),

2454 (hyperlink_target,

2455 re.compile(r"""

2456 \.\.[ ]+ # explicit markup start

2457 _ # target indicator

2458 (?![ ]|$) # first char. not space or EOL

2459 """, re.VERBOSE)),

2460 (substitution_def,

2461 re.compile(r"""

2462 \.\.[ ]+ # explicit markup start

2463 \| # substitution indicator

2464 (?![ ]|$) # first char. not space or EOL

2465 """, re.VERBOSE)),

2466 (directive,

2467 re.compile(r"""

2468 \.\.[ ]+ # explicit markup start

2469 (%s) # directive name

2470 [ ]? # optional space

2471 :: # directive delimiter

2472 ([ ]+|$) # whitespace or end of line

2473 """ % Inliner.simplename, re.VERBOSE))]

2474

2475 def explicit_markup(self, match, context, next_state):

2476 """Footnotes, hyperlink targets, directives, comments."""

2477 nodelist, blank_finish = self.explicit_construct(match)

2478 self.parent += nodelist

2479 self.explicit_list(blank_finish)

2480 return [], next_state, []

2481

2482 def explicit_construct(self, match):

2483 """Determine which explicit construct this is, parse & return it."""

2484 errors = []

2485 for method, pattern in self.explicit.constructs:

2486 expmatch = pattern.match(match.string)

2487 if expmatch:

2488 try:

2489 return method(self, expmatch)

2490 except MarkupError as error:

2491 lineno = self.state_machine.abs_line_number()

2492 message = ' '.join(error.args)

2493 errors.append(self.reporter.warning(message, line=lineno))

2494 break

2495 nodelist, blank_finish = self.comment(match)

2496 return nodelist + errors, blank_finish

2497

2498 def explicit_list(self, blank_finish) -> None:

2499 """

2500 Create a nested state machine for a series of explicit markup

2501 constructs (including anonymous hyperlink targets).

2502 """

2503 offset = self.state_machine.line_offset + 1 # next line

2504 newline_offset, blank_finish = self.nested_list_parse(

2505 self.state_machine.input_lines[offset:],

2506 input_offset=self.state_machine.abs_line_offset() + 1,

2507 node=self.parent, initial_state='Explicit',

2508 blank_finish=blank_finish)

2509 self.goto_line(newline_offset)

2510 if not blank_finish:

2511 self.parent += self.unindent_warning('Explicit markup')

2512

2513 def anonymous(self, match, context, next_state):

2514 """Anonymous hyperlink targets."""

2515 nodelist, blank_finish = self.anonymous_target(match)

2516 self.parent += nodelist

2517 self.explicit_list(blank_finish)

2518 return [], next_state, []

2519

2520 def anonymous_target(self, match):

2521 lineno = self.state_machine.abs_line_number()

2522 (block, indent, offset, blank_finish

2523 ) = self.state_machine.get_first_known_indented(match.end(),

2524 until_blank=True)

2525 blocktext = match.string[:match.end()] + '\n'.join(block)

2526 block = [escape2null(line) for line in block]

2527 target = self.make_target(block, blocktext, lineno, '')

2528 return [target], blank_finish

2529

2530 def line(self, match, context, next_state):

2531 """Section title overline or transition marker."""

2532 if self.state_machine.match_titles:

2533 return [match.string], 'Line', []

2534 elif match.string.strip() == '::':

2535 raise statemachine.TransitionCorrection('text')

2536 elif len(match.string.strip()) < 4:

2537 msg = self.reporter.info(

2538 'Unexpected possible title overline or transition.\n'

2539 "Treating it as ordinary text because it's so short.",

2540 line=self.state_machine.abs_line_number())

2541 self.parent += msg

2542 raise statemachine.TransitionCorrection('text')

2543 else:

2544 blocktext = self.state_machine.line

2545 msg = self.reporter.error(

2546 'Unexpected section title or transition.',

2547 nodes.literal_block(blocktext, blocktext),

2548 line=self.state_machine.abs_line_number())

2549 self.parent += msg

2550 return [], next_state, []

2551

2552 def text(self, match, context, next_state):

2553 """Titles, definition lists, paragraphs."""

2554 return [match.string], 'Text', []

2555

2556

2557class RFC2822Body(Body):

2558

2559 """

2560 RFC2822 headers are only valid as the first constructs in documents. As

2561 soon as anything else appears, the `Body` state should take over.

2562 """

2563

2564 patterns = Body.patterns.copy() # can't modify the original

2565 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'

2566 initial_transitions = [(name, 'Body')

2567 for name in Body.initial_transitions]

2568 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'

2569

2570 def rfc2822(self, match, context, next_state):

2571 """RFC2822-style field list item."""

2572 fieldlist = nodes.field_list(classes=['rfc2822'])

2573 self.parent += fieldlist

2574 field, blank_finish = self.rfc2822_field(match)

2575 fieldlist += field

2576 offset = self.state_machine.line_offset + 1 # next line

2577 newline_offset, blank_finish = self.nested_list_parse(

2578 self.state_machine.input_lines[offset:],

2579 input_offset=self.state_machine.abs_line_offset() + 1,

2580 node=fieldlist, initial_state='RFC2822List',

2581 blank_finish=blank_finish)

2582 self.goto_line(newline_offset)

2583 if not blank_finish:

2584 self.parent += self.unindent_warning(

2585 'RFC2822-style field list')

2586 return [], next_state, []

2587

2588 def rfc2822_field(self, match):

2589 name = match.string[:match.string.find(':')]

2590 (indented, indent, line_offset, blank_finish

2591 ) = self.state_machine.get_first_known_indented(match.end(),

2592 until_blank=True)

2593 fieldnode = nodes.field()

2594 fieldnode += nodes.field_name(name, name)

2595 fieldbody = nodes.field_body('\n'.join(indented))

2596 fieldnode += fieldbody

2597 if indented:

2598 self.nested_parse(indented, input_offset=line_offset,

2599 node=fieldbody)

2600 return fieldnode, blank_finish

2601

2602

2603class SpecializedBody(Body):

2604

2605 """

2606 Superclass for second and subsequent compound element members. Compound

2607 elements are lists and list-like constructs.

2608

2609 All transition methods are disabled (redefined as `invalid_input`).

2610 Override individual methods in subclasses to re-enable.

2611

2612 For example, once an initial bullet list item, say, is recognized, the

2613 `BulletList` subclass takes over, with a "bullet_list" node as its

2614 container. Upon encountering the initial bullet list item, `Body.bullet`

2615 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which

2616 starts up a nested parsing session with `BulletList` as the initial state.

2617 Only the ``bullet`` transition method is enabled in `BulletList`; as long

2618 as only bullet list items are encountered, they are parsed and inserted

2619 into the container. The first construct which is *not* a bullet list item

2620 triggers the `invalid_input` method, which ends the nested parse and

2621 closes the container. `BulletList` needs to recognize input that is

2622 invalid in the context of a bullet list, which means everything *other

2623 than* bullet list items, so it inherits the transition list created in

2624 `Body`.

2625 """

2626

2627 def invalid_input(self, match=None, context=None, next_state=None):

2628 """Not a compound element member. Abort this state machine."""

2629 self.state_machine.previous_line() # back up so parent SM can reassess

2630 raise EOFError

2631

2632 indent = invalid_input

2633 bullet = invalid_input

2634 enumerator = invalid_input

2635 field_marker = invalid_input

2636 option_marker = invalid_input

2637 doctest = invalid_input

2638 line_block = invalid_input

2639 grid_table_top = invalid_input

2640 simple_table_top = invalid_input

2641 explicit_markup = invalid_input

2642 anonymous = invalid_input

2643 line = invalid_input

2644 text = invalid_input

2645

2646

2647class BulletList(SpecializedBody):

2648

2649 """Second and subsequent bullet_list list_items."""

2650

2651 def bullet(self, match, context, next_state):

2652 """Bullet list item."""

2653 if match.string[0] != self.parent['bullet']:

2654 # different bullet: new list

2655 self.invalid_input()

2656 listitem, blank_finish = self.list_item(match.end())

2657 self.parent += listitem

2658 self.blank_finish = blank_finish

2659 return [], next_state, []

2660

2661

2662class DefinitionList(SpecializedBody):

2663

2664 """Second and subsequent definition_list_items."""

2665

2666 def text(self, match, context, next_state):

2667 """Definition lists."""

2668 return [match.string], 'Definition', []

2669

2670

2671class EnumeratedList(SpecializedBody):

2672

2673 """Second and subsequent enumerated_list list_items."""

2674

2675 def enumerator(self, match, context, next_state):

2676 """Enumerated list item."""

2677 format, sequence, text, ordinal = self.parse_enumerator(

2678 match, self.parent['enumtype'])

2679 if (format != self.format

2680 or (sequence != '#' and (sequence != self.parent['enumtype']

2681 or self.auto

2682 or ordinal != (self.lastordinal + 1)))

2683 or not self.is_enumerated_list_item(ordinal, sequence, format)):

2684 # different enumeration: new list

2685 self.invalid_input()

2686 if sequence == '#':

2687 self.auto = 1

2688 listitem, blank_finish = self.list_item(match.end())

2689 self.parent += listitem

2690 self.blank_finish = blank_finish

2691 self.lastordinal = ordinal

2692 return [], next_state, []

2693

2694

2695class FieldList(SpecializedBody):

2696

2697 """Second and subsequent field_list fields."""

2698

2699 def field_marker(self, match, context, next_state):

2700 """Field list field."""

2701 field, blank_finish = self.field(match)

2702 self.parent += field

2703 self.blank_finish = blank_finish

2704 return [], next_state, []

2705

2706

2707class OptionList(SpecializedBody):

2708

2709 """Second and subsequent option_list option_list_items."""

2710

2711 def option_marker(self, match, context, next_state):

2712 """Option list item."""

2713 try:

2714 option_list_item, blank_finish = self.option_list_item(match)

2715 except MarkupError:

2716 self.invalid_input()

2717 self.parent += option_list_item

2718 self.blank_finish = blank_finish

2719 return [], next_state, []

2720

2721

2722class RFC2822List(SpecializedBody, RFC2822Body):

2723

2724 """Second and subsequent RFC2822-style field_list fields."""

2725

2726 patterns = RFC2822Body.patterns

2727 initial_transitions = RFC2822Body.initial_transitions

2728

2729 def rfc2822(self, match, context, next_state):

2730 """RFC2822-style field list item."""

2731 field, blank_finish = self.rfc2822_field(match)

2732 self.parent += field

2733 self.blank_finish = blank_finish

2734 return [], 'RFC2822List', []

2735

2736 blank = SpecializedBody.invalid_input

2737

2738

2739class ExtensionOptions(FieldList):

2740

2741 """

2742 Parse field_list fields for extension options.

2743

2744 No nested parsing is done (including inline markup parsing).

2745 """

2746

2747 def parse_field_body(self, indented, offset, node) -> None:

2748 """Override `Body.parse_field_body` for simpler parsing."""

2749 lines = []

2750 for line in list(indented) + ['']:

2751 if line.strip():

2752 lines.append(line)

2753 elif lines:

2754 text = '\n'.join(lines)

2755 node += nodes.paragraph(text, text)

2756 lines = []

2757

2758

2759class LineBlock(SpecializedBody):

2760

2761 """Second and subsequent lines of a line_block."""

2762

2763 blank = SpecializedBody.invalid_input

2764

2765 def line_block(self, match, context, next_state):

2766 """New line of line block."""

2767 lineno = self.state_machine.abs_line_number()

2768 line, messages, blank_finish = self.line_block_line(match, lineno)

2769 self.parent += line

2770 self.parent.parent += messages

2771 self.blank_finish = blank_finish

2772 return [], next_state, []

2773

2774

2775class Explicit(SpecializedBody):

2776

2777 """Second and subsequent explicit markup construct."""

2778

2779 def explicit_markup(self, match, context, next_state):

2780 """Footnotes, hyperlink targets, directives, comments."""

2781 nodelist, blank_finish = self.explicit_construct(match)

2782 self.parent += nodelist

2783 self.blank_finish = blank_finish

2784 return [], next_state, []

2785

2786 def anonymous(self, match, context, next_state):

2787 """Anonymous hyperlink targets."""

2788 nodelist, blank_finish = self.anonymous_target(match)

2789 self.parent += nodelist

2790 self.blank_finish = blank_finish

2791 return [], next_state, []

2792

2793 blank = SpecializedBody.invalid_input

2794

2795

2796class SubstitutionDef(Body):

2797

2798 """

2799 Parser for the contents of a substitution_definition element.

2800 """

2801

2802 patterns = {

2803 'embedded_directive': re.compile(r'(%s)::( +|$)'

2804 % Inliner.simplename),

2805 'text': r''}

2806 initial_transitions = ['embedded_directive', 'text']

2807

2808 def embedded_directive(self, match, context, next_state):

2809 nodelist, blank_finish = self.directive(match,

2810 alt=self.parent['names'][0])

2811 self.parent += nodelist

2812 if not self.state_machine.at_eof():

2813 self.blank_finish = blank_finish

2814 raise EOFError

2815

2816 def text(self, match, context, next_state):

2817 if not self.state_machine.at_eof():

2818 self.blank_finish = self.state_machine.is_next_line_blank()

2819 raise EOFError

2820

2821

2822class Text(RSTState):

2823

2824 """

2825 Classifier of second line of a text block.

2826

2827 Could be a paragraph, a definition list item, or a title.

2828 """

2829

2830 patterns = {'underline': Body.patterns['line'],

2831 'text': r''}

2832 initial_transitions = [('underline', 'Body'), ('text', 'Body')]

2833

2834 def blank(self, match, context, next_state):

2835 """End of paragraph."""

2836 # NOTE: self.paragraph returns [node, system_message(s)], literalnext

2837 paragraph, literalnext = self.paragraph(

2838 context, self.state_machine.abs_line_number() - 1)

2839 self.parent += paragraph

2840 if literalnext:

2841 self.parent += self.literal_block()

2842 return [], 'Body', []

2843

2844 def eof(self, context):

2845 if context:

2846 self.blank(None, context, None)

2847 return []

2848

2849 def indent(self, match, context, next_state):

2850 """Definition list item."""

2851 dl = nodes.definition_list()

2852 # the definition list starts on the line before the indent:

2853 lineno = self.state_machine.abs_line_number() - 1

2854 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)

2855 dl_item, blank_finish = self.definition_list_item(context)

2856 dl += dl_item

2857 self.parent += dl

2858 offset = self.state_machine.line_offset + 1 # next line

2859 newline_offset, blank_finish = self.nested_list_parse(

2860 self.state_machine.input_lines[offset:],

2861 input_offset=self.state_machine.abs_line_offset() + 1,

2862 node=dl, initial_state='DefinitionList',

2863 blank_finish=blank_finish, blank_finish_state='Definition')

2864 self.goto_line(newline_offset)

2865 if not blank_finish:

2866 self.parent += self.unindent_warning('Definition list')

2867 return [], 'Body', []

2868

2869 def underline(self, match, context, next_state):

2870 """Section title."""

2871 lineno = self.state_machine.abs_line_number()

2872 title = context[0].rstrip()

2873 underline = match.string.rstrip()

2874 source = title + '\n' + underline

2875 messages = []

2876 if column_width(title) > len(underline):

2877 if len(underline) < 4:

2878 if self.state_machine.match_titles:

2879 msg = self.reporter.info(

2880 'Possible title underline, too short for the title.\n'

2881 "Treating it as ordinary text because it's so short.",

2882 line=lineno)

2883 self.parent += msg

2884 raise statemachine.TransitionCorrection('text')

2885 else:

2886 blocktext = context[0] + '\n' + self.state_machine.line

2887 msg = self.reporter.warning(

2888 'Title underline too short.',

2889 nodes.literal_block(blocktext, blocktext),

2890 line=lineno)

2891 messages.append(msg)

2892 if not self.state_machine.match_titles:

2893 blocktext = context[0] + '\n' + self.state_machine.line

2894 # We need get_source_and_line() here to report correctly

2895 src, srcline = self.state_machine.get_source_and_line()

2896 # TODO: why is abs_line_number() == srcline+1

2897 # if the error is in a table (try with test_tables.py)?

2898 # print("get_source_and_line", srcline)

2899 # print("abs_line_number", self.state_machine.abs_line_number())

2900 msg = self.reporter.error(

2901 'Unexpected section title.',

2902 nodes.literal_block(blocktext, blocktext),

2903 source=src, line=srcline)

2904 self.parent += messages

2905 self.parent += msg

2906 return [], next_state, []

2907 style = underline[0]

2908 context[:] = []

2909 self.section(title, source, style, lineno - 1, messages)

2910 return [], next_state, []

2911

2912 def text(self, match, context, next_state):

2913 """Paragraph."""

2914 startline = self.state_machine.abs_line_number() - 1

2915 msg = None

2916 try:

2917 block = self.state_machine.get_text_block(flush_left=True)

2918 except statemachine.UnexpectedIndentationError as err:

2919 block, src, srcline = err.args

2920 msg = self.reporter.error('Unexpected indentation.',

2921 source=src, line=srcline)

2922 lines = context + list(block)

2923 paragraph, literalnext = self.paragraph(lines, startline)

2924 self.parent += paragraph

2925 self.parent += msg

2926 if literalnext:

2927 try:

2928 self.state_machine.next_line()

2929 except EOFError:

2930 pass

2931 self.parent += self.literal_block()

2932 return [], next_state, []

2933

2934 def literal_block(self):

2935 """Return a list of nodes."""

2936 (indented, indent, offset, blank_finish

2937 ) = self.state_machine.get_indented()

2938 while indented and not indented[-1].strip():

2939 indented.trim_end()

2940 if not indented:

2941 return self.quoted_literal_block()

2942 data = '\n'.join(indented)

2943 literal_block = nodes.literal_block(data, data)

2944 (literal_block.source,

2945 literal_block.line) = self.state_machine.get_source_and_line(offset+1)

2946 nodelist = [literal_block]

2947 if not blank_finish:

2948 nodelist.append(self.unindent_warning('Literal block'))

2949 return nodelist

2950

2951 def quoted_literal_block(self):

2952 abs_line_offset = self.state_machine.abs_line_offset()

2953 offset = self.state_machine.line_offset

2954 parent_node = nodes.Element()

2955 new_abs_offset = self.nested_parse(

2956 self.state_machine.input_lines[offset:],

2957 input_offset=abs_line_offset, node=parent_node, match_titles=False,

2958 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),

2959 'initial_state': 'QuotedLiteralBlock'})

2960 self.goto_line(new_abs_offset)

2961 return parent_node.children

2962

2963 def definition_list_item(self, termline):

2964 # the parser is already on the second (indented) line:

2965 dd_lineno = self.state_machine.abs_line_number()

2966 dt_lineno = dd_lineno - 1

2967 (indented, indent, line_offset, blank_finish

2968 ) = self.state_machine.get_indented()

2969 dl_item = nodes.definition_list_item(

2970 '\n'.join(termline + list(indented)))

2971 (dl_item.source,

2972 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)

2973 dt_nodes, messages = self.term(termline, dt_lineno)

2974 dl_item += dt_nodes

2975 dd = nodes.definition('', *messages)

2976 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)

2977 dl_item += dd

2978 if termline[0][-2:] == '::':

2979 dd += self.reporter.info(

2980 'Blank line missing before literal block (after the "::")? '

2981 'Interpreted as a definition list item.',

2982 line=dd_lineno)

2983 # TODO: drop a definition if it is an empty comment to allow

2984 # definition list items with several terms?

2985 # https://sourceforge.net/p/docutils/feature-requests/60/

2986 self.nested_parse(indented, input_offset=line_offset, node=dd)

2987 return dl_item, blank_finish

2988

2989 classifier_delimiter = re.compile(' +: +')

2990

2991 def term(self, lines, lineno):

2992 """Return a definition_list's term and optional classifiers."""

2993 assert len(lines) == 1

2994 text_nodes, messages = self.inline_text(lines[0], lineno)

2995 dt = nodes.term(lines[0])

2996 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)

2997 node_list = [dt]

2998 for i in range(len(text_nodes)):

2999 node = text_nodes[i]

3000 if isinstance(node, nodes.Text):

3001 parts = self.classifier_delimiter.split(node)

3002 if len(parts) == 1:

3003 node_list[-1] += node

3004 else:

3005 text = parts[0].rstrip()

3006 textnode = nodes.Text(text)

3007 node_list[-1] += textnode

3008 node_list += [nodes.classifier(unescape(part, True), part)

3009 for part in parts[1:]]

3010 else:

3011 node_list[-1] += node

3012 return node_list, messages

3013

3014

3015class SpecializedText(Text):

3016

3017 """

3018 Superclass for second and subsequent lines of Text-variants.

3019

3020 All transition methods are disabled. Override individual methods in

3021 subclasses to re-enable.

3022 """

3023

3024 def eof(self, context):

3025 """Incomplete construct."""

3026 return []

3027

3028 def invalid_input(self, match=None, context=None, next_state=None):

3029 """Not a compound element member. Abort this state machine."""

3030 raise EOFError

3031

3032 blank = invalid_input

3033 indent = invalid_input

3034 underline = invalid_input

3035 text = invalid_input

3036

3037

3038class Definition(SpecializedText):

3039

3040 """Second line of potential definition_list_item."""

3041

3042 def eof(self, context):

3043 """Not a definition."""

3044 self.state_machine.previous_line(2) # so parent SM can reassess

3045 return []

3046

3047 def indent(self, match, context, next_state):

3048 """Definition list item."""

3049 dl_item, blank_finish = self.definition_list_item(context)

3050 self.parent += dl_item

3051 self.blank_finish = blank_finish

3052 return [], 'DefinitionList', []

3053

3054

3055class Line(SpecializedText):

3056

3057 """

3058 Second line of over- & underlined section title or transition marker.

3059 """

3060

3061 eofcheck = 1 # ignored, will be removed in Docutils 2.0.

3062

3063 def eof(self, context):

3064 """Transition marker at end of section or document."""

3065 marker = context[0].strip()

3066 if len(marker) < 4:

3067 self.state_correction(context)

3068 src, srcline = self.state_machine.get_source_and_line()

3069 # lineno = self.state_machine.abs_line_number() - 1

3070 transition = nodes.transition(rawsource=context[0])

3071 transition.source = src

3072 transition.line = srcline - 1

3073 # transition.line = lineno

3074 self.parent += transition

3075 return []

3076

3077 def blank(self, match, context, next_state):

3078 """Transition marker."""

3079 src, srcline = self.state_machine.get_source_and_line()

3080 marker = context[0].strip()

3081 if len(marker) < 4:

3082 self.state_correction(context)

3083 transition = nodes.transition(rawsource=marker)

3084 transition.source = src

3085 transition.line = srcline - 1

3086 self.parent += transition

3087 return [], 'Body', []

3088

3089 def text(self, match, context, next_state):

3090 """Potential over- & underlined title."""

3091 lineno = self.state_machine.abs_line_number() - 1

3092 overline = context[0]

3093 title = match.string

3094 underline = ''

3095 try:

3096 underline = self.state_machine.next_line()

3097 except EOFError:

3098 blocktext = overline + '\n' + title

3099 if len(overline.rstrip()) < 4:

3100 self.short_overline(context, blocktext, lineno, 2)

3101 else:

3102 msg = self.reporter.error(

3103 'Incomplete section title.',

3104 nodes.literal_block(blocktext, blocktext),

3105 line=lineno)

3106 self.parent += msg

3107 return [], 'Body', []

3108 source = '%s\n%s\n%s' % (overline, title, underline)

3109 overline = overline.rstrip()

3110 underline = underline.rstrip()

3111 if not self.transitions['underline'][0].match(underline):

3112 blocktext = overline + '\n' + title + '\n' + underline

3113 if len(overline.rstrip()) < 4:

3114 self.short_overline(context, blocktext, lineno, 2)

3115 else:

3116 msg = self.reporter.error(

3117 'Missing matching underline for section title overline.',

3118 nodes.literal_block(source, source),

3119 line=lineno)

3120 self.parent += msg

3121 return [], 'Body', []

3122 elif overline != underline:

3123 blocktext = overline + '\n' + title + '\n' + underline

3124 if len(overline.rstrip()) < 4:

3125 self.short_overline(context, blocktext, lineno, 2)

3126 else:

3127 msg = self.reporter.error(

3128 'Title overline & underline mismatch.',

3129 nodes.literal_block(source, source),

3130 line=lineno)

3131 self.parent += msg

3132 return [], 'Body', []

3133 title = title.rstrip()

3134 messages = []

3135 if column_width(title) > len(overline):

3136 blocktext = overline + '\n' + title + '\n' + underline

3137 if len(overline.rstrip()) < 4:

3138 self.short_overline(context, blocktext, lineno, 2)

3139 else:

3140 msg = self.reporter.warning(

3141 'Title overline too short.',

3142 nodes.literal_block(source, source),

3143 line=lineno)

3144 messages.append(msg)

3145 style = (overline[0], underline[0])

3146 self.section(title.lstrip(), source, style, lineno + 1, messages)

3147 return [], 'Body', []

3148

3149 indent = text # indented title

3150

3151 def underline(self, match, context, next_state):

3152 overline = context[0]

3153 blocktext = overline + '\n' + self.state_machine.line

3154 lineno = self.state_machine.abs_line_number() - 1

3155 if len(overline.rstrip()) < 4:

3156 self.short_overline(context, blocktext, lineno, 1)

3157 msg = self.reporter.error(

3158 'Invalid section title or transition marker.',

3159 nodes.literal_block(blocktext, blocktext),

3160 line=lineno)

3161 self.parent += msg

3162 return [], 'Body', []

3163

3164 def short_overline(self, context, blocktext, lineno, lines=1) -> None:

3165 msg = self.reporter.info(

3166 'Possible incomplete section title.\nTreating the overline as '

3167 "ordinary text because it's so short.",

3168 line=lineno)

3169 self.parent += msg

3170 self.state_correction(context, lines)

3171

3172 def state_correction(self, context, lines=1):

3173 self.state_machine.previous_line(lines)

3174 context[:] = []

3175 raise statemachine.StateCorrection('Body', 'text')

3176

3177

3178class QuotedLiteralBlock(RSTState):

3179

3180 """

3181 Nested parse handler for quoted (unindented) literal blocks.

3182

3183 Special-purpose. Not for inclusion in `state_classes`.

3184 """

3185

3186 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,

3187 'text': r''}

3188 initial_transitions = ('initial_quoted', 'text')

3189

3190 def __init__(self, state_machine, debug=False) -> None:

3191 RSTState.__init__(self, state_machine, debug)

3192 self.messages = []

3193 self.initial_lineno = None

3194

3195 def blank(self, match, context, next_state):

3196 if context:

3197 raise EOFError

3198 else:

3199 return context, next_state, []

3200

3201 def eof(self, context):

3202 if context:

3203 src, srcline = self.state_machine.get_source_and_line(

3204 self.initial_lineno)

3205 text = '\n'.join(context)

3206 literal_block = nodes.literal_block(text, text)

3207 literal_block.source = src

3208 literal_block.line = srcline

3209 self.parent += literal_block

3210 else:

3211 self.parent += self.reporter.warning(

3212 'Literal block expected; none found.',

3213 line=self.state_machine.abs_line_number()

3214 ) # src not available, statemachine.input_lines is empty

3215 self.state_machine.previous_line()

3216 self.parent += self.messages

3217 return []

3218

3219 def indent(self, match, context, next_state):

3220 assert context, ('QuotedLiteralBlock.indent: context should not '

3221 'be empty!')

3222 self.messages.append(

3223 self.reporter.error('Unexpected indentation.',

3224 line=self.state_machine.abs_line_number()))

3225 self.state_machine.previous_line()

3226 raise EOFError

3227

3228 def initial_quoted(self, match, context, next_state):

3229 """Match arbitrary quote character on the first line only."""

3230 self.remove_transition('initial_quoted')

3231 quote = match.string[0]

3232 pattern = re.compile(re.escape(quote))

3233 # New transition matches consistent quotes only:

3234 self.add_transition('quoted',

3235 (pattern, self.quoted, self.__class__.__name__))

3236 self.initial_lineno = self.state_machine.abs_line_number()

3237 return [match.string], next_state, []

3238

3239 def quoted(self, match, context, next_state):

3240 """Match consistent quotes on subsequent lines."""

3241 context.append(match.string)

3242 return context, next_state, []

3243

3244 def text(self, match, context, next_state):

3245 if context:

3246 self.messages.append(

3247 self.reporter.error('Inconsistent literal block quoting.',

3248 line=self.state_machine.abs_line_number()))

3249 self.state_machine.previous_line()

3250 raise EOFError

3251

3252

3253state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,

3254 OptionList, LineBlock, ExtensionOptions, Explicit, Text,

3255 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)

3256"""Standard set of State classes used to start `RSTStateMachine`."""