Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/parsers/rst/states.py: 68%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Copyright: This module has been placed in the public domain.

5"""

6This is the ``docutils.parsers.rst.states`` module, the core of

7the reStructuredText parser. It defines the following:

9:Classes:

10 - `RSTStateMachine`: reStructuredText parser's entry point.

11 - `NestedStateMachine`: recursive StateMachine.

12 - `RSTState`: reStructuredText State superclass.

13 - `Inliner`: For parsing inline markup.

14 - `Body`: Generic classifier of the first line of a block.

15 - `SpecializedBody`: Superclass for compound element members.

16 - `BulletList`: Second and subsequent bullet_list list_items

17 - `DefinitionList`: Second+ definition_list_items.

18 - `EnumeratedList`: Second+ enumerated_list list_items.

19 - `FieldList`: Second+ fields.

20 - `OptionList`: Second+ option_list_items.

21 - `RFC2822List`: Second+ RFC2822-style fields.

22 - `ExtensionOptions`: Parses directive option fields.

23 - `Explicit`: Second+ explicit markup constructs.

24 - `SubstitutionDef`: For embedded directives in substitution definitions.

25 - `Text`: Classifier of second line of a text block.

26 - `SpecializedText`: Superclass for continuation lines of Text-variants.

27 - `Definition`: Second line of potential definition_list_item.

28 - `Line`: Second line of overlined section title or transition marker.

29 - `Struct`: obsolete, use `types.SimpleNamespace`.

31:Exception classes:

32 - `MarkupError`

33 - `ParserError`

34 - `MarkupMismatch`

36:Functions:

37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.

38 - `unescape()`: Return a string, nulls removed or restored to backslashes.

40:Attributes:

41 - `state_classes`: set of State classes used with `RSTStateMachine`.

43Parser Overview

44===============

46The reStructuredText parser is implemented as a recursive state machine,

47examining its input one line at a time. To understand how the parser works,

48please first become familiar with the `docutils.statemachine` module. In the

49description below, references are made to classes defined in this module;

50please see the individual classes for details.

52Parsing proceeds as follows:

541. The state machine examines each line of input, checking each of the

55 transition patterns of the state `Body`, in order, looking for a match.

56 The implicit transitions (blank lines and indentation) are checked before

57 any others. The 'text' transition is a catch-all (matches anything).

592. The method associated with the matched transition pattern is called.

61 A. Some transition methods are self-contained, appending elements to the

62 document tree (`Body.doctest` parses a doctest block). The parser's

63 current line index is advanced to the end of the element, and parsing

64 continues with step 1.

66 B. Other transition methods trigger the creation of a nested state machine,

67 whose job is to parse a compound construct ('indent' does a block quote,

68 'bullet' does a bullet list, 'overline' does a section [first checking

69 for a valid section header], etc.).

71 - In the case of lists and explicit markup, a one-off state machine is

72 created and run to parse contents of the first item.

74 - A new state machine is created and its initial state is set to the

75 appropriate specialized state (`BulletList` in the case of the

76 'bullet' transition; see `SpecializedBody` for more detail). This

77 state machine is run to parse the compound element (or series of

78 explicit markup elements), and returns as soon as a non-member element

79 is encountered. For example, the `BulletList` state machine ends as

80 soon as it encounters an element which is not a list item of that

81 bullet list. The optional omission of inter-element blank lines is

82 enabled by this nested state machine.

84 - The current line index is advanced to the end of the elements parsed,

85 and parsing continues with step 1.

87 C. The result of the 'text' transition depends on the next line of text.

88 The current state is changed to `Text`, under which the second line is

89 examined. If the second line is:

91 - Indented: The element is a definition list item, and parsing proceeds

92 similarly to step 2.B, using the `DefinitionList` state.

94 - A line of uniform punctuation characters: The element is a section

95 header; again, parsing proceeds as in step 2.B, and `Body` is still

96 used.

98 - Anything else: The element is a paragraph, which is examined for

99 inline markup and appended to the parent element. Processing

100 continues with step 1.

101"""

102

103from __future__ import annotations

104

105__docformat__ = 'reStructuredText'

106

107import copy

108import re

109from types import FunctionType, MethodType

110from types import SimpleNamespace as Struct

111

112from docutils import nodes, statemachine, utils

113from docutils import ApplicationError, DataError

114from docutils.statemachine import StateMachineWS, StateWS

115from docutils.nodes import fully_normalize_name as normalize_name

116from docutils.nodes import unescape, whitespace_normalize_name

117import docutils.parsers.rst

118from docutils.parsers.rst import directives, languages, tableparser, roles

119from docutils.utils import escape2null, column_width

120from docutils.utils import punctuation_chars, urischemes

121from docutils.utils import split_escaped_whitespace

122from docutils.utils._roman_numerals import (InvalidRomanNumeralError,

123 RomanNumeral)

124

125TYPE_CHECKING = False

126if TYPE_CHECKING:

127 from docutils.statemachine import StringList

128

129

130class MarkupError(DataError): pass

131class UnknownInterpretedRoleError(DataError): pass

132class InterpretedRoleNotImplementedError(DataError): pass

133class ParserError(ApplicationError): pass

134class MarkupMismatch(Exception): pass

135

136

137class RSTStateMachine(StateMachineWS):

138

139 """

140 reStructuredText's master StateMachine.

141

142 The entry point to reStructuredText parsing is the `run()` method.

143 """

144

145 def run(self, input_lines, document, input_offset=0, match_titles=True,

146 inliner=None) -> None:

147 """

148 Parse `input_lines` and modify the `document` node in place.

149

150 Extend `StateMachineWS.run()`: set up parse-global data and

151 run the StateMachine.

152 """

153 self.language = languages.get_language(

154 document.settings.language_code, document.reporter)

155 self.match_titles = match_titles

156 if inliner is None:

157 inliner = Inliner()

158 inliner.init_customizations(document.settings)

159 # A collection of objects to share with nested parsers.

160 # The attributes `reporter`, `section_level`, and

161 # `section_bubble_up_kludge` will be removed in Docutils 2.0

162 self.memo = Struct(document=document,

163 reporter=document.reporter, # ignored

164 language=self.language,

165 title_styles=[],

166 section_level=0, # ignored

167 section_bubble_up_kludge=False, # ignored

168 inliner=inliner)

169 self.document = document

170 self.attach_observer(document.note_source)

171 self.reporter = self.document.reporter

172 self.node = document

173 results = StateMachineWS.run(self, input_lines, input_offset,

174 input_source=document['source'])

175 assert results == [], 'RSTStateMachine.run() results should be empty!'

176 self.node = self.memo = None # remove unneeded references

177

178

179class NestedStateMachine(StateMachineWS):

180 """

181 StateMachine run from within other StateMachine runs, to parse nested

182 document structures.

183 """

184

185 def run(self, input_lines, input_offset, memo, node, match_titles=True):

186 """

187 Parse `input_lines` and populate `node`.

188

189 Use a separate "title style hierarchy" (changed in Docutils 0.23).

190

191 Extend `StateMachineWS.run()`: set up document-wide data.

192 """

193 self.match_titles = match_titles

194 self.memo = copy.copy(memo)

195 self.document = memo.document

196 self.attach_observer(self.document.note_source)

197 self.language = memo.language

198 self.reporter = self.document.reporter

199 self.node = node

200 if match_titles:

201 # Use a separate section title style hierarchy;

202 # ensure all sections in the `input_lines` are treated as

203 # subsections of the current section by blocking lower

204 # section levels with a style that is impossible in rST:

205 self.memo.title_styles = ['x'] * len(node.section_hierarchy())

206 results = StateMachineWS.run(self, input_lines, input_offset)

207 assert results == [], ('NestedStateMachine.run() results should be '

208 'empty!')

209 return results

210

211

212class RSTState(StateWS):

213

214 """

215 reStructuredText State superclass.

216

217 Contains methods used by all State subclasses.

218 """

219

220 nested_sm = NestedStateMachine

221 nested_sm_cache = []

222

223 def __init__(self, state_machine, debug=False) -> None:

224 self.nested_sm_kwargs = {'state_classes': state_classes,

225 'initial_state': 'Body'}

226 StateWS.__init__(self, state_machine, debug)

227

228 def runtime_init(self) -> None:

229 StateWS.runtime_init(self)

230 memo = self.state_machine.memo

231 self.memo = memo

232 self.document = memo.document

233 self.inliner = memo.inliner

234 self.reporter = self.document.reporter

235 # enable the reporter to determine source and source-line

236 if not hasattr(self.reporter, 'get_source_and_line'):

237 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501

238

239 @property

240 def parent(self) -> nodes.Element | None:

241 return self.state_machine.node

242

243 @parent.setter

244 def parent(self, value: nodes.Element):

245 self.state_machine.node = value

246

247 def goto_line(self, abs_line_offset) -> None:

248 """

249 Jump to input line `abs_line_offset`, ignoring jumps past the end.

250 """

251 try:

252 self.state_machine.goto_line(abs_line_offset)

253 except EOFError:

254 pass

255

256 def no_match(self, context, transitions):

257 """

258 Override `StateWS.no_match` to generate a system message.

259

260 This code should never be run.

261 """

262 self.reporter.severe(

263 'Internal error: no transition pattern match. State: "%s"; '

264 'transitions: %s; context: %s; current line: %r.'

265 % (self.__class__.__name__, transitions, context,

266 self.state_machine.line))

267 return context, None, []

268

269 def bof(self, context):

270 """Called at beginning of file."""

271 return [], []

272

273 def nested_parse(self,

274 block: StringList,

275 input_offset: int,

276 node: nodes.Element,

277 match_titles: bool = False,

278 state_machine_class: StateMachineWS|None = None,

279 state_machine_kwargs: dict|None = None

280 ) -> int:

281 """

282 Parse the input `block` with a nested state-machine rooted at `node`.

283

284 :block:

285 reStructuredText source extract.

286 :input_offset:

287 Line number at start of the block.

288 :node:

289 Base node. All generated nodes will be appended to this node.

290 :match_titles:

291 Allow section titles?

292 A separate section title style hierarchy is used for the nested

293 parsing (all sections are subsections of the current section).

294 The calling code should check whether sections are valid

295 children of the base node and move them or warn otherwise.

296 :state_machine_class:

297 Default: `NestedStateMachine`.

298 :state_machine_kwargs:

299 Keyword arguments for the state-machine instantiation.

300 Default: `self.nested_sm_kwargs`.

301

302 Create a new state-machine instance if required.

303 Return new offset.

304 """

305 use_default = 0

306 if state_machine_class is None:

307 state_machine_class = self.nested_sm

308 use_default += 1

309 if state_machine_kwargs is None:

310 state_machine_kwargs = self.nested_sm_kwargs

311 use_default += 1

312 state_machine = None

313 if use_default == 2:

314 try:

315 state_machine = self.nested_sm_cache.pop()

316 except IndexError:

317 pass

318 if not state_machine:

319 state_machine = state_machine_class(

320 debug=self.debug,

321 parent_state_machine=self.state_machine,

322 **state_machine_kwargs)

323 # run the statemachine and populate `node`:

324 block_length = len(block)

325 state_machine.run(block, input_offset, memo=self.memo,

326 node=node, match_titles=match_titles)

327 # clean up

328 if use_default == 2:

329 self.nested_sm_cache.append(state_machine)

330 else:

331 state_machine.unlink()

332 new_offset = state_machine.abs_line_offset()

333 # No `block.parent` implies disconnected -- lines aren't in sync:

334 if block.parent and (len(block) - block_length) != 0:

335 # Adjustment for block if modified in nested parse:

336 self.state_machine.next_line(len(block) - block_length)

337 return new_offset

338

339 def nested_list_parse(self, block, input_offset, node, initial_state,

340 blank_finish,

341 blank_finish_state=None,

342 extra_settings={},

343 match_titles=False,

344 state_machine_class=None,

345 state_machine_kwargs=None):

346 """

347 Parse the input `block` with a nested state-machine rooted at `node`.

348

349 Create a new StateMachine rooted at `node` and run it over the

350 input `block` (see also `nested_parse()`).

351 Also keep track of optional intermediate blank lines and the

352 required final one.

353

354 Return new offset and a boolean indicating whether there was a

355 blank final line.

356 """

357 if state_machine_class is None:

358 state_machine_class = self.nested_sm

359 if state_machine_kwargs is None:

360 state_machine_kwargs = self.nested_sm_kwargs.copy()

361 state_machine_kwargs['initial_state'] = initial_state

362 state_machine = state_machine_class(

363 debug=self.debug,

364 parent_state_machine=self.state_machine,

365 **state_machine_kwargs)

366 if blank_finish_state is None:

367 blank_finish_state = initial_state

368 state_machine.states[blank_finish_state].blank_finish = blank_finish

369 for key, value in extra_settings.items():

370 setattr(state_machine.states[initial_state], key, value)

371 state_machine.run(block, input_offset, memo=self.memo,

372 node=node, match_titles=match_titles)

373 blank_finish = state_machine.states[blank_finish_state].blank_finish

374 state_machine.unlink()

375 return state_machine.abs_line_offset(), blank_finish

376

377 def section(self, title, source, style, lineno, messages) -> None:

378 """Check for a valid subsection and create one if it checks out."""

379 if self.check_subsection(source, style, lineno):

380 self.new_subsection(title, lineno, messages)

381

382 def check_subsection(self, source, style, lineno) -> bool:

383 """

384 Check for a valid subsection header. Update section data in `memo`.

385

386 When a new section is reached that isn't a subsection of the current

387 section, set `self.parent` to the new section's parent section

388 (or the root node if the new section is a top-level section).

389 """

390 title_styles = self.memo.title_styles

391 parent_sections = self.parent.section_hierarchy()

392 # current section level: (0 root, 1 section, 2 subsection, ...)

393 oldlevel = len(parent_sections)

394 # new section level:

395 try: # check for existing title style

396 newlevel = title_styles.index(style) + 1

397 except ValueError: # new title style

398 newlevel = len(title_styles) + 1

399 # The new level must not be deeper than an immediate child

400 # of the current level:

401 if newlevel > oldlevel + 1:

402 styles = ' '.join('/'.join(style) for style in title_styles)

403 self.parent += self.reporter.error(

404 'Inconsistent title style:'

405 f' skip from level {oldlevel} to {newlevel}.',

406 nodes.literal_block('', source),

407 nodes.paragraph('', f'Established title styles: {styles}'),

408 line=lineno)

409 return False

410 # Update parent state:

411 if newlevel > len(title_styles):

412 title_styles.append(style)

413 self.memo.section_level = newlevel

414 if newlevel <= oldlevel:

415 # new section is sibling or higher up in the section hierarchy

416 self.parent = parent_sections[newlevel-1].parent

417 return True

418

419 def title_inconsistent(self, sourcetext, lineno):

420 # Ignored. Will be removed in Docutils 2.0.

421 error = self.reporter.error(

422 'Title level inconsistent:', nodes.literal_block('', sourcetext),

423 line=lineno)

424 return error

425

426 def new_subsection(self, title, lineno, messages):

427 """Append new subsection to document tree."""

428 section_node = nodes.section()

429 self.parent += section_node

430 textnodes, title_messages = self.inline_text(title, lineno)

431 titlenode = nodes.title(title, '', *textnodes)

432 name = normalize_name(titlenode.astext())

433 section_node['names'].append(name)

434 section_node += titlenode

435 section_node += messages

436 section_node += title_messages

437 self.document.note_implicit_target(section_node, section_node)

438 # Update state:

439 self.parent = section_node

440

441 def paragraph(self, lines, lineno):

442 """

443 Return a list (paragraph & messages) & a boolean: literal_block next?

444 """

445 data = '\n'.join(lines).rstrip()

446 if re.search(r'(?<!\\)(\\\\)*::$', data):

447 if len(data) == 2:

448 return [], 1

449 elif data[-3] in ' \n':

450 text = data[:-3].rstrip()

451 else:

452 text = data[:-1]

453 literalnext = 1

454 else:

455 text = data

456 literalnext = 0

457 textnodes, messages = self.inline_text(text, lineno)

458 p = nodes.paragraph(data, '', *textnodes)

459 p.source, p.line = self.state_machine.get_source_and_line(lineno)

460 return [p] + messages, literalnext

461

462 def inline_text(self, text, lineno):

463 """

464 Return 2 lists: nodes (text and inline elements), and system_messages.

465 """

466 nodes, messages = self.inliner.parse(text, lineno,

467 self.memo, self.parent)

468 return nodes, messages

469

470 def unindent_warning(self, node_name):

471 # the actual problem is one line below the current line

472 lineno = self.state_machine.abs_line_number() + 1

473 return self.reporter.warning('%s ends without a blank line; '

474 'unexpected unindent.' % node_name,

475 line=lineno)

476

477

478def build_regexp(definition, compile_patterns=True):

479 """

480 Build, compile and return a regular expression based on `definition`.

481

482 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),

483 where "parts" is a list of regular expressions and/or regular

484 expression definitions to be joined into an or-group.

485 """

486 name, prefix, suffix, parts = definition

487 part_strings = []

488 for part in parts:

489 if isinstance(part, tuple):

490 part_strings.append(build_regexp(part, None))

491 else:

492 part_strings.append(part)

493 or_group = '|'.join(part_strings)

494 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()

495 if compile_patterns:

496 return re.compile(regexp)

497 else:

498 return regexp

499

500

501class Inliner:

502

503 """

504 Parse inline markup; call the `parse()` method.

505 """

506

507 def __init__(self) -> None:

508 self.implicit_dispatch = []

509 """List of (pattern, bound method) tuples, used by

510 `self.implicit_inline`."""

511

512 def init_customizations(self, settings) -> None:

513 # lookahead and look-behind expressions for inline markup rules

514 if getattr(settings, 'character_level_inline_markup', False):

515 start_string_prefix = '(^|(?<!\x00))'

516 end_string_suffix = ''

517 else:

518 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %

519 (punctuation_chars.openers,

520 punctuation_chars.delimiters))

521 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %

522 (punctuation_chars.closing_delimiters,

523 punctuation_chars.delimiters,

524 punctuation_chars.closers))

525 args = locals().copy()

526 args.update(vars(self.__class__))

527

528 parts = ('initial_inline', start_string_prefix, '',

529 [

530 ('start', '', self.non_whitespace_after, # simple start-strings

531 [r'\*\*', # strong

532 r'\*(?!\*)', # emphasis but not strong

533 r'``', # literal

534 r'_`', # inline internal target

535 r'\|(?!\|)'] # substitution reference

536 ),

537 ('whole', '', end_string_suffix, # whole constructs

538 [ # reference name & end-string

539 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,

540 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',

541 [r'[0-9]+', # manually numbered

542 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)

543 r'\*', # auto-symbol

544 r'(?P<citationlabel>%s)' % self.simplename, # citation ref

545 ]

546 )

547 ]

548 ),

549 ('backquote', # interpreted text or phrase reference

550 '(?P<role>(:%s:)?)' % self.simplename, # optional role

551 self.non_whitespace_after,

552 ['`(?!`)'] # but not literal

553 )

554 ]

555 )

556 self.start_string_prefix = start_string_prefix

557 self.end_string_suffix = end_string_suffix

558 self.parts = parts

559

560 self.patterns = Struct(

561 initial=build_regexp(parts),

562 emphasis=re.compile(self.non_whitespace_escape_before

563 + r'(\*)' + end_string_suffix),

564 strong=re.compile(self.non_whitespace_escape_before

565 + r'(\*\*)' + end_string_suffix),

566 interpreted_or_phrase_ref=re.compile(

567 r"""

568 %(non_unescaped_whitespace_escape_before)s

569 (

570 `

571 (?P<suffix>

572 (?P<role>:%(simplename)s:)?

573 (?P<refend>__?)?

574 )

575 )

576 %(end_string_suffix)s

577 """ % args, re.VERBOSE),

578 embedded_link=re.compile(

579 r"""

580 (

581 (?:[ \n]+|^) # spaces or beginning of line/string

582 < # open bracket

583 %(non_whitespace_after)s

584 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets

585 %(non_whitespace_escape_before)s

586 > # close bracket

587 )

588 $ # end of string

589 """ % args, re.VERBOSE),

590 literal=re.compile(self.non_whitespace_before + '(``)'

591 + end_string_suffix),

592 target=re.compile(self.non_whitespace_escape_before

593 + r'(`)' + end_string_suffix),

594 substitution_ref=re.compile(self.non_whitespace_escape_before

595 + r'(\|_{0,2})'

596 + end_string_suffix),

597 email=re.compile(self.email_pattern % args + '$',

598 re.VERBOSE),

599 uri=re.compile(

600 (r"""

601 %(start_string_prefix)s

602 (?P<whole>

603 (?P<absolute> # absolute URI

604 (?P<scheme> # scheme (http, ftp, mailto)

605 [a-zA-Z][a-zA-Z0-9.+-]*

606 )

607 :

608 (

609 ( # either:

610 (//?)? # hierarchical URI

611 %(uric)s* # URI characters

612 %(uri_end)s # final URI char

613 )

614 ( # optional query

615 \?%(uric)s*

616 %(uri_end)s

617 )?

618 ( # optional fragment

619 \#%(uric)s*

620 %(uri_end)s

621 )?

622 )

623 )

624 | # *OR*

625 (?P<email> # email address

626 """ + self.email_pattern + r"""

627 )

628 )

629 %(end_string_suffix)s

630 """) % args, re.VERBOSE),

631 pep=re.compile(

632 r"""

633 %(start_string_prefix)s

634 (

635 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file

636 |

637 (PEP\s+(?P<pepnum2>\d+)) # reference by name

638 )

639 %(end_string_suffix)s""" % args, re.VERBOSE),

640 rfc=re.compile(

641 r"""

642 %(start_string_prefix)s

643 (RFC(-|\s+)?(?P<rfcnum>\d+))

644 %(end_string_suffix)s""" % args, re.VERBOSE))

645

646 self.implicit_dispatch.append((self.patterns.uri,

647 self.standalone_uri))

648 if settings.pep_references:

649 self.implicit_dispatch.append((self.patterns.pep,

650 self.pep_reference))

651 if settings.rfc_references:

652 self.implicit_dispatch.append((self.patterns.rfc,

653 self.rfc_reference))

654

655 def parse(self, text, lineno, memo, parent):

656 # Needs to be refactored for nested inline markup.

657 # Add nested_parse() method?

658 """

659 Return 2 lists: nodes (text and inline elements), and system_messages.

660

661 Using `self.patterns.initial`, a pattern which matches start-strings

662 (emphasis, strong, interpreted, phrase reference, literal,

663 substitution reference, and inline target) and complete constructs

664 (simple reference, footnote reference), search for a candidate. When

665 one is found, check for validity (e.g., not a quoted '*' character).

666 If valid, search for the corresponding end string if applicable, and

667 check it for validity. If not found or invalid, generate a warning

668 and ignore the start-string. Implicit inline markup (e.g. standalone

669 URIs) is found last.

670

671 :text: source string

672 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`

673 """

674 self.document = memo.document

675 self.language = memo.language

676 self.reporter = self.document.reporter

677 self.parent = parent

678 pattern_search = self.patterns.initial.search

679 dispatch = self.dispatch

680 remaining = escape2null(text)

681 processed = []

682 unprocessed = []

683 messages = []

684 while remaining:

685 match = pattern_search(remaining)

686 if match:

687 groups = match.groupdict()

688 method = dispatch[groups['start'] or groups['backquote']

689 or groups['refend'] or groups['fnend']]

690 before, inlines, remaining, sysmessages = method(self, match,

691 lineno)

692 unprocessed.append(before)

693 messages += sysmessages

694 if inlines:

695 processed += self.implicit_inline(''.join(unprocessed),

696 lineno)

697 processed += inlines

698 unprocessed = []

699 else:

700 break

701 remaining = ''.join(unprocessed) + remaining

702 if remaining:

703 processed += self.implicit_inline(remaining, lineno)

704 return processed, messages

705

706 # Inline object recognition

707 # -------------------------

708 # See also init_customizations().

709 non_whitespace_before = r'(?<!\s)'

710 non_whitespace_escape_before = r'(?<![\s\x00])'

711 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'

712 non_whitespace_after = r'(?!\s)'

713 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):

714 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'

715 # Valid URI characters (see RFC 2396 & RFC 2732);

716 # final \x00 allows backslash escapes in URIs:

717 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""

718 # Delimiter indicating the end of a URI (not part of the URI):

719 uri_end_delim = r"""[>]"""

720 # Last URI character; same as uric but no punctuation:

721 urilast = r"""[_~*/=+a-zA-Z0-9]"""

722 # End of a URI (either 'urilast' or 'uric followed by a

723 # uri_end_delim'):

724 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()

725 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""

726 email_pattern = r"""

727 %(emailc)s+(?:\.%(emailc)s+)* # name

728 (?<!\x00)@ # at

729 %(emailc)s+(?:\.%(emailc)s*)* # host

730 %(uri_end)s # final URI char

731 """

732

733 def quoted_start(self, match):

734 """Test if inline markup start-string is 'quoted'.

735

736 'Quoted' in this context means the start-string is enclosed in a pair

737 of matching opening/closing delimiters (not necessarily quotes)

738 or at the end of the match.

739 """

740 string = match.string

741 start = match.start()

742 if start == 0: # start-string at beginning of text

743 return False

744 prestart = string[start - 1]

745 try:

746 poststart = string[match.end()]

747 except IndexError: # start-string at end of text

748 return True # not "quoted" but no markup start-string either

749 return punctuation_chars.match_chars(prestart, poststart)

750

751 def inline_obj(self, match, lineno, end_pattern, nodeclass,

752 restore_backslashes=False):

753 string = match.string

754 matchstart = match.start('start')

755 matchend = match.end('start')

756 if self.quoted_start(match):

757 return string[:matchend], [], string[matchend:], [], ''

758 endmatch = end_pattern.search(string[matchend:])

759 if endmatch and endmatch.start(1): # 1 or more chars

760 text = endmatch.string[:endmatch.start(1)]

761 if restore_backslashes:

762 text = unescape(text, True)

763 textend = matchend + endmatch.end(1)

764 rawsource = unescape(string[matchstart:textend], True)

765 node = nodeclass(rawsource, text)

766 return (string[:matchstart], [node],

767 string[textend:], [], endmatch.group(1))

768 msg = self.reporter.warning(

769 'Inline %s start-string without end-string.'

770 % nodeclass.__name__, line=lineno)

771 text = unescape(string[matchstart:matchend], True)

772 prb = self.problematic(text, text, msg)

773 return string[:matchstart], [prb], string[matchend:], [msg], ''

774

775 def problematic(self, text, rawsource, message):

776 msgid = self.document.set_id(message, self.parent)

777 problematic = nodes.problematic(rawsource, text, refid=msgid)

778 prbid = self.document.set_id(problematic)

779 message.add_backref(prbid)

780 return problematic

781

782 def emphasis(self, match, lineno):

783 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

784 match, lineno, self.patterns.emphasis, nodes.emphasis)

785 return before, inlines, remaining, sysmessages

786

787 def strong(self, match, lineno):

788 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

789 match, lineno, self.patterns.strong, nodes.strong)

790 return before, inlines, remaining, sysmessages

791

792 def interpreted_or_phrase_ref(self, match, lineno):

793 end_pattern = self.patterns.interpreted_or_phrase_ref

794 string = match.string

795 matchstart = match.start('backquote')

796 matchend = match.end('backquote')

797 rolestart = match.start('role')

798 role = match.group('role')

799 position = ''

800 if role:

801 role = role[1:-1]

802 position = 'prefix'

803 elif self.quoted_start(match):

804 return string[:matchend], [], string[matchend:], []

805 endmatch = end_pattern.search(string[matchend:])

806 if endmatch and endmatch.start(1): # 1 or more chars

807 textend = matchend + endmatch.end()

808 if endmatch.group('role'):

809 if role:

810 msg = self.reporter.warning(

811 'Multiple roles in interpreted text (both '

812 'prefix and suffix present; only one allowed).',

813 line=lineno)

814 text = unescape(string[rolestart:textend], True)

815 prb = self.problematic(text, text, msg)

816 return string[:rolestart], [prb], string[textend:], [msg]

817 role = endmatch.group('suffix')[1:-1]

818 position = 'suffix'

819 escaped = endmatch.string[:endmatch.start(1)]

820 rawsource = unescape(string[matchstart:textend], True)

821 if rawsource[-1:] == '_':

822 if role:

823 msg = self.reporter.warning(

824 'Mismatch: both interpreted text role %s and '

825 'reference suffix.' % position, line=lineno)

826 text = unescape(string[rolestart:textend], True)

827 prb = self.problematic(text, text, msg)

828 return string[:rolestart], [prb], string[textend:], [msg]

829 return self.phrase_ref(string[:matchstart], string[textend:],

830 rawsource, escaped)

831 else:

832 rawsource = unescape(string[rolestart:textend], True)

833 nodelist, messages = self.interpreted(rawsource, escaped, role,

834 lineno)

835 return (string[:rolestart], nodelist,

836 string[textend:], messages)

837 msg = self.reporter.warning(

838 'Inline interpreted text or phrase reference start-string '

839 'without end-string.', line=lineno)

840 text = unescape(string[matchstart:matchend], True)

841 prb = self.problematic(text, text, msg)

842 return string[:matchstart], [prb], string[matchend:], [msg]

843

844 def phrase_ref(self, before, after, rawsource, escaped, text=None):

845 # `text` is ignored (since 0.16)

846 match = self.patterns.embedded_link.search(escaped)

847 if match: # embedded <URI> or <alias_>

848 text = escaped[:match.start(0)]

849 unescaped = unescape(text)

850 rawtext = unescape(text, True)

851 aliastext = match.group(2)

852 rawaliastext = unescape(aliastext, True)

853 underscore_escaped = rawaliastext.endswith(r'\_')

854 if (aliastext.endswith('_')

855 and not (underscore_escaped

856 or self.patterns.uri.match(aliastext))):

857 aliastype = 'name'

858 alias = normalize_name(unescape(aliastext[:-1]))

859 target = nodes.target(match.group(1), refname=alias)

860 target.indirect_reference_name = whitespace_normalize_name(

861 unescape(aliastext[:-1]))

862 else:

863 aliastype = 'uri'

864 # remove unescaped whitespace

865 alias_parts = split_escaped_whitespace(match.group(2))

866 alias = ' '.join(''.join(part.split())

867 for part in alias_parts)

868 alias = self.adjust_uri(unescape(alias))

869 if alias.endswith(r'\_'):

870 alias = alias[:-2] + '_'

871 target = nodes.target(match.group(1), refuri=alias)

872 target.referenced = 1

873 if not aliastext:

874 raise ApplicationError('problem with embedded link: %r'

875 % aliastext)

876 if not text:

877 text = alias

878 unescaped = unescape(text)

879 rawtext = rawaliastext

880 else:

881 text = escaped

882 unescaped = unescape(text)

883 target = None

884 rawtext = unescape(escaped, True)

885

886 refname = normalize_name(unescaped)

887 reference = nodes.reference(rawsource, text,

888 name=whitespace_normalize_name(unescaped))

889 reference[0].rawsource = rawtext

890

891 node_list = [reference]

892

893 if rawsource[-2:] == '__':

894 if target and (aliastype == 'name'):

895 reference['refname'] = alias

896 self.document.note_refname(reference)

897 # self.document.note_indirect_target(target) # required?

898 elif target and (aliastype == 'uri'):

899 reference['refuri'] = alias

900 else:

901 reference['anonymous'] = True

902 else:

903 if target:

904 target['names'].append(refname)

905 if aliastype == 'name':

906 reference['refname'] = alias

907 self.document.note_indirect_target(target)

908 self.document.note_refname(reference)

909 else:

910 reference['refuri'] = alias

911 # target.note_referenced_by(name=refname)

912 self.document.note_implicit_target(target, self.parent)

913 node_list.append(target)

914 else:

915 reference['refname'] = refname

916 self.document.note_refname(reference)

917 return before, node_list, after, []

918

919 def adjust_uri(self, uri):

920 match = self.patterns.email.match(uri)

921 if match:

922 return 'mailto:' + uri

923 else:

924 return uri

925

926 def interpreted(self, rawsource, text, role, lineno):

927 role_fn, messages = roles.role(role, self.language, lineno,

928 self.reporter)

929 if role_fn:

930 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)

931 return nodes, messages + messages2

932 else:

933 msg = self.reporter.error(

934 'Unknown interpreted text role "%s".' % role,

935 line=lineno)

936 return ([self.problematic(rawsource, rawsource, msg)],

937 messages + [msg])

938

939 def literal(self, match, lineno):

940 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

941 match, lineno, self.patterns.literal, nodes.literal,

942 restore_backslashes=True)

943 return before, inlines, remaining, sysmessages

944

945 def inline_internal_target(self, match, lineno):

946 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

947 match, lineno, self.patterns.target, nodes.target)

948 if inlines and isinstance(inlines[0], nodes.target):

949 assert len(inlines) == 1

950 target = inlines[0]

951 name = normalize_name(target.astext())

952 target['names'].append(name)

953 self.document.note_explicit_target(target, self.parent)

954 return before, inlines, remaining, sysmessages

955

956 def substitution_reference(self, match, lineno):

957 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

958 match, lineno, self.patterns.substitution_ref,

959 nodes.substitution_reference)

960 if len(inlines) == 1:

961 subref_node = inlines[0]

962 if isinstance(subref_node, nodes.substitution_reference):

963 subref_text = subref_node.astext()

964 self.document.note_substitution_ref(subref_node, subref_text)

965 if endstring[-1:] == '_':

966 reference_node = nodes.reference(

967 '|%s%s' % (subref_text, endstring), '')

968 if endstring[-2:] == '__':

969 reference_node['anonymous'] = True

970 else:

971 reference_node['refname'] = normalize_name(subref_text)

972 self.document.note_refname(reference_node)

973 reference_node += subref_node

974 inlines = [reference_node]

975 return before, inlines, remaining, sysmessages

976

977 def footnote_reference(self, match, lineno):

978 """

979 Handles `nodes.footnote_reference` and `nodes.citation_reference`

980 elements.

981 """

982 label = match.group('footnotelabel')

983 refname = normalize_name(label)

984 string = match.string

985 before = string[:match.start('whole')]

986 remaining = string[match.end('whole'):]

987 if match.group('citationlabel'):

988 refnode = nodes.citation_reference('[%s]_' % label,

989 refname=refname)

990 refnode += nodes.Text(label)

991 self.document.note_citation_ref(refnode)

992 else:

993 refnode = nodes.footnote_reference('[%s]_' % label)

994 if refname[0] == '#':

995 refname = refname[1:]

996 refnode['auto'] = 1

997 self.document.note_autofootnote_ref(refnode)

998 elif refname == '*':

999 refname = ''

1000 refnode['auto'] = '*'

1001 self.document.note_symbol_footnote_ref(

1002 refnode)

1003 else:

1004 refnode += nodes.Text(label)

1005 if refname:

1006 refnode['refname'] = refname

1007 self.document.note_footnote_ref(refnode)

1008 if utils.get_trim_footnote_ref_space(self.document.settings):

1009 before = before.rstrip()

1010 return before, [refnode], remaining, []

1011

1012 def reference(self, match, lineno, anonymous=False):

1013 referencename = match.group('refname')

1014 refname = normalize_name(referencename)

1015 referencenode = nodes.reference(

1016 referencename + match.group('refend'), referencename,

1017 name=whitespace_normalize_name(referencename))

1018 referencenode[0].rawsource = referencename

1019 if anonymous:

1020 referencenode['anonymous'] = True

1021 else:

1022 referencenode['refname'] = refname

1023 self.document.note_refname(referencenode)

1024 string = match.string

1025 matchstart = match.start('whole')

1026 matchend = match.end('whole')

1027 return string[:matchstart], [referencenode], string[matchend:], []

1028

1029 def anonymous_reference(self, match, lineno):

1030 return self.reference(match, lineno, anonymous=True)

1031

1032 def standalone_uri(self, match, lineno):

1033 if (not match.group('scheme')

1034 or match.group('scheme').lower() in urischemes.schemes):

1035 if match.group('email'):

1036 addscheme = 'mailto:'

1037 else:

1038 addscheme = ''

1039 text = match.group('whole')

1040 refuri = addscheme + unescape(text)

1041 reference = nodes.reference(unescape(text, True), text,

1042 refuri=refuri)

1043 return [reference]

1044 else: # not a valid scheme

1045 raise MarkupMismatch

1046

1047 def pep_reference(self, match, lineno):

1048 text = match.group(0)

1049 if text.startswith('pep-'):

1050 pepnum = int(unescape(match.group('pepnum1')))

1051 elif text.startswith('PEP'):

1052 pepnum = int(unescape(match.group('pepnum2')))

1053 else:

1054 raise MarkupMismatch

1055 ref = (self.document.settings.pep_base_url

1056 + self.document.settings.pep_file_url_template % pepnum)

1057 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1058

1059 rfc_url = 'rfc%d.html'

1060

1061 def rfc_reference(self, match, lineno):

1062 text = match.group(0)

1063 if text.startswith('RFC'):

1064 rfcnum = int(unescape(match.group('rfcnum')))

1065 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum

1066 else:

1067 raise MarkupMismatch

1068 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1069

1070 def implicit_inline(self, text, lineno):

1071 """

1072 Check each of the patterns in `self.implicit_dispatch` for a match,

1073 and dispatch to the stored method for the pattern. Recursively check

1074 the text before and after the match. Return a list of `nodes.Text`

1075 and inline element nodes.

1076 """

1077 if not text:

1078 return []

1079 for pattern, method in self.implicit_dispatch:

1080 match = pattern.search(text)

1081 if match:

1082 try:

1083 # Must recurse on strings before *and* after the match;

1084 # there may be multiple patterns.

1085 return (self.implicit_inline(text[:match.start()], lineno)

1086 + method(match, lineno)

1087 + self.implicit_inline(text[match.end():], lineno))

1088 except MarkupMismatch:

1089 pass

1090 return [nodes.Text(text)]

1091

1092 dispatch = {'*': emphasis,

1093 '**': strong,

1094 '`': interpreted_or_phrase_ref,

1095 '``': literal,

1096 '_`': inline_internal_target,

1097 ']_': footnote_reference,

1098 '|': substitution_reference,

1099 '_': reference,

1100 '__': anonymous_reference}

1101

1102

1103def _loweralpha_to_int(s, _zero=(ord('a')-1)):

1104 return ord(s) - _zero

1105

1106

1107def _upperalpha_to_int(s, _zero=(ord('A')-1)):

1108 return ord(s) - _zero

1109

1110

1111class Body(RSTState):

1112

1113 """

1114 Generic classifier of the first line of a block.

1115 """

1116

1117 double_width_pad_char = tableparser.TableParser.double_width_pad_char

1118 """Padding character for East Asian double-width text."""

1119

1120 enum = Struct()

1121 """Enumerated list parsing information."""

1122

1123 enum.formatinfo = {

1124 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),

1125 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),

1126 'period': Struct(prefix='', suffix='.', start=0, end=-1)}

1127 enum.formats = enum.formatinfo.keys()

1128 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',

1129 'lowerroman', 'upperroman'] # ORDERED!

1130 enum.sequencepats = {'arabic': '[0-9]+',

1131 'loweralpha': '[a-z]',

1132 'upperalpha': '[A-Z]',

1133 'lowerroman': '[ivxlcdm]+',

1134 'upperroman': '[IVXLCDM]+'}

1135 enum.converters = {'arabic': int,

1136 'loweralpha': _loweralpha_to_int,

1137 'upperalpha': _upperalpha_to_int,

1138 'lowerroman': RomanNumeral.from_string,

1139 'upperroman': RomanNumeral.from_string}

1140

1141 enum.sequenceregexps = {}

1142 for sequence in enum.sequences:

1143 enum.sequenceregexps[sequence] = re.compile(

1144 enum.sequencepats[sequence] + '$')

1145

1146 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')

1147 """Matches the top (& bottom) of a full table)."""

1148

1149 simple_table_top_pat = re.compile('=+( +=+)+ *$')

1150 """Matches the top of a simple table."""

1151

1152 simple_table_border_pat = re.compile('=+[ =]*$')

1153 """Matches the bottom & header bottom of a simple table."""

1154

1155 pats = {}

1156 """Fragments of patterns used by transitions."""

1157

1158 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'

1159 pats['alpha'] = '[a-zA-Z]'

1160 pats['alphanum'] = '[a-zA-Z0-9]'

1161 pats['alphanumplus'] = '[a-zA-Z0-9_-]'

1162 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'

1163 '|%(upperroman)s|#)' % enum.sequencepats)

1164 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats

1165 # @@@ Loosen up the pattern? Allow Unicode?

1166 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats

1167 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats

1168 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats

1169 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats

1170

1171 for format in enum.formats:

1172 pats[format] = '(?P<%s>%s%s%s)' % (

1173 format, re.escape(enum.formatinfo[format].prefix),

1174 pats['enum'], re.escape(enum.formatinfo[format].suffix))

1175

1176 patterns = {

1177 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',

1178 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,

1179 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',

1180 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,

1181 'doctest': r'>>>( +|$)',

1182 'line_block': r'\|( +|$)',

1183 'grid_table_top': grid_table_top_pat,

1184 'simple_table_top': simple_table_top_pat,

1185 'explicit_markup': r'\.\.( +|$)',

1186 'anonymous': r'__( +|$)',

1187 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,

1188 'text': r''}

1189 initial_transitions = (

1190 'bullet',

1191 'enumerator',

1192 'field_marker',

1193 'option_marker',

1194 'doctest',

1195 'line_block',

1196 'grid_table_top',

1197 'simple_table_top',

1198 'explicit_markup',

1199 'anonymous',

1200 'line',

1201 'text')

1202

1203 def indent(self, match, context, next_state):

1204 """Block quote."""

1205 (indented, indent, line_offset, blank_finish

1206 ) = self.state_machine.get_indented()

1207 elements = self.block_quote(indented, line_offset)

1208 self.parent += elements

1209 if not blank_finish:

1210 self.parent += self.unindent_warning('Block quote')

1211 return context, next_state, []

1212

1213 def block_quote(self, indented, line_offset):

1214 elements = []

1215 while indented:

1216 blockquote = nodes.block_quote(rawsource='\n'.join(indented))

1217 (blockquote.source, blockquote.line

1218 ) = self.state_machine.get_source_and_line(line_offset+1)

1219 (blockquote_lines,

1220 attribution_lines,

1221 attribution_offset,

1222 indented,

1223 new_line_offset) = self.split_attribution(indented, line_offset)

1224 self.nested_parse(blockquote_lines, line_offset, blockquote)

1225 elements.append(blockquote)

1226 if attribution_lines:

1227 attribution, messages = self.parse_attribution(

1228 attribution_lines, line_offset+attribution_offset)

1229 blockquote += attribution

1230 elements += messages

1231 line_offset = new_line_offset

1232 while indented and not indented[0]:

1233 indented = indented[1:]

1234 line_offset += 1

1235 return elements

1236

1237 # U+2014 is an em-dash:

1238 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')

1239

1240 def split_attribution(self, indented, line_offset):

1241 """

1242 Check for a block quote attribution and split it off:

1243

1244 * First line after a blank line must begin with a dash ("--", "---",

1245 em-dash; matches `self.attribution_pattern`).

1246 * Every line after that must have consistent indentation.

1247 * Attributions must be preceded by block quote content.

1248

1249 Return a tuple of: (block quote content lines, attribution lines,

1250 attribution offset, remaining indented lines, remaining lines offset).

1251 """

1252 blank = None

1253 nonblank_seen = False

1254 for i in range(len(indented)):

1255 line = indented[i].rstrip()

1256 if line:

1257 if nonblank_seen and blank == i - 1: # last line blank

1258 match = self.attribution_pattern.match(line)

1259 if match:

1260 attribution_end, indent = self.check_attribution(

1261 indented, i)

1262 if attribution_end:

1263 a_lines = indented[i:attribution_end]

1264 a_lines.trim_left(match.end(), end=1)

1265 a_lines.trim_left(indent, start=1)

1266 return (indented[:i], a_lines,

1267 i, indented[attribution_end:],

1268 line_offset + attribution_end)

1269 nonblank_seen = True

1270 else:

1271 blank = i

1272 else:

1273 return indented, None, None, None, None

1274

1275 def check_attribution(self, indented, attribution_start):

1276 """

1277 Check attribution shape.

1278 Return the index past the end of the attribution, and the indent.

1279 """

1280 indent = None

1281 i = attribution_start + 1

1282 for i in range(attribution_start + 1, len(indented)):

1283 line = indented[i].rstrip()

1284 if not line:

1285 break

1286 if indent is None:

1287 indent = len(line) - len(line.lstrip())

1288 elif len(line) - len(line.lstrip()) != indent:

1289 return None, None # bad shape; not an attribution

1290 else:

1291 # return index of line after last attribution line:

1292 i += 1

1293 return i, (indent or 0)

1294

1295 def parse_attribution(self, indented, line_offset):

1296 text = '\n'.join(indented).rstrip()

1297 lineno = 1 + line_offset # line_offset is zero-based

1298 textnodes, messages = self.inline_text(text, lineno)

1299 node = nodes.attribution(text, '', *textnodes)

1300 node.source, node.line = self.state_machine.get_source_and_line(lineno)

1301 return node, messages

1302

1303 def bullet(self, match, context, next_state):

1304 """Bullet list item."""

1305 ul = nodes.bullet_list()

1306 ul.source, ul.line = self.state_machine.get_source_and_line()

1307 self.parent += ul

1308 ul['bullet'] = match.string[0]

1309 i, blank_finish = self.list_item(match.end())

1310 ul += i

1311 offset = self.state_machine.line_offset + 1 # next line

1312 new_line_offset, blank_finish = self.nested_list_parse(

1313 self.state_machine.input_lines[offset:],

1314 input_offset=self.state_machine.abs_line_offset() + 1,

1315 node=ul, initial_state='BulletList',

1316 blank_finish=blank_finish)

1317 self.goto_line(new_line_offset)

1318 if not blank_finish:

1319 self.parent += self.unindent_warning('Bullet list')

1320 return [], next_state, []

1321

1322 def list_item(self, indent):

1323 src, srcline = self.state_machine.get_source_and_line()

1324 if self.state_machine.line[indent:]:

1325 indented, line_offset, blank_finish = (

1326 self.state_machine.get_known_indented(indent))

1327 else:

1328 indented, indent, line_offset, blank_finish = (

1329 self.state_machine.get_first_known_indented(indent))

1330 listitem = nodes.list_item('\n'.join(indented))

1331 listitem.source, listitem.line = src, srcline

1332 if indented:

1333 self.nested_parse(indented, input_offset=line_offset,

1334 node=listitem)

1335 return listitem, blank_finish

1336

1337 def enumerator(self, match, context, next_state):

1338 """Enumerated List Item"""

1339 format, sequence, text, ordinal = self.parse_enumerator(match)

1340 if not self.is_enumerated_list_item(ordinal, sequence, format):

1341 raise statemachine.TransitionCorrection('text')

1342 enumlist = nodes.enumerated_list()

1343 (enumlist.source,

1344 enumlist.line) = self.state_machine.get_source_and_line()

1345 self.parent += enumlist

1346 if sequence == '#':

1347 enumlist['enumtype'] = 'arabic'

1348 else:

1349 enumlist['enumtype'] = sequence

1350 enumlist['prefix'] = self.enum.formatinfo[format].prefix

1351 enumlist['suffix'] = self.enum.formatinfo[format].suffix

1352 if ordinal != 1:

1353 enumlist['start'] = ordinal

1354 msg = self.reporter.info(

1355 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'

1356 % (text, ordinal), base_node=enumlist)

1357 self.parent += msg

1358 listitem, blank_finish = self.list_item(match.end())

1359 enumlist += listitem

1360 offset = self.state_machine.line_offset + 1 # next line

1361 newline_offset, blank_finish = self.nested_list_parse(

1362 self.state_machine.input_lines[offset:],

1363 input_offset=self.state_machine.abs_line_offset() + 1,

1364 node=enumlist, initial_state='EnumeratedList',

1365 blank_finish=blank_finish,

1366 extra_settings={'lastordinal': ordinal,

1367 'format': format,

1368 'auto': sequence == '#'})

1369 self.goto_line(newline_offset)

1370 if not blank_finish:

1371 self.parent += self.unindent_warning('Enumerated list')

1372 return [], next_state, []

1373

1374 def parse_enumerator(self, match, expected_sequence=None):

1375 """

1376 Analyze an enumerator and return the results.

1377

1378 :Return:

1379 - the enumerator format ('period', 'parens', or 'rparen'),

1380 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),

1381 - the text of the enumerator, stripped of formatting, and

1382 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;

1383 ``None`` is returned for invalid enumerator text).

1384

1385 The enumerator format has already been determined by the regular

1386 expression match. If `expected_sequence` is given, that sequence is

1387 tried first. If not, we check for Roman numeral 1. This way,

1388 single-character Roman numerals (which are also alphabetical) can be

1389 matched. If no sequence has been matched, all sequences are checked in

1390 order.

1391 """

1392 groupdict = match.groupdict()

1393 sequence = ''

1394 for format in self.enum.formats:

1395 if groupdict[format]: # was this the format matched?

1396 break # yes; keep `format`

1397 else: # shouldn't happen

1398 raise ParserError('enumerator format not matched')

1399 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501

1400 : self.enum.formatinfo[format].end]

1401 if text == '#':

1402 sequence = '#'

1403 elif expected_sequence:

1404 try:

1405 if self.enum.sequenceregexps[expected_sequence].match(text):

1406 sequence = expected_sequence

1407 except KeyError: # shouldn't happen

1408 raise ParserError('unknown enumerator sequence: %s'

1409 % sequence)

1410 elif text == 'i':

1411 sequence = 'lowerroman'

1412 elif text == 'I':

1413 sequence = 'upperroman'

1414 if not sequence:

1415 for sequence in self.enum.sequences:

1416 if self.enum.sequenceregexps[sequence].match(text):

1417 break

1418 else: # shouldn't happen

1419 raise ParserError('enumerator sequence not matched')

1420 if sequence == '#':

1421 ordinal = 1

1422 else:

1423 try:

1424 ordinal = int(self.enum.converters[sequence](text))

1425 except InvalidRomanNumeralError:

1426 ordinal = None

1427 return format, sequence, text, ordinal

1428

1429 def is_enumerated_list_item(self, ordinal, sequence, format):

1430 """

1431 Check validity based on the ordinal value and the second line.

1432

1433 Return true if the ordinal is valid and the second line is blank,

1434 indented, or starts with the next enumerator or an auto-enumerator.

1435 """

1436 if ordinal is None:

1437 return None

1438 try:

1439 next_line = self.state_machine.next_line()

1440 except EOFError: # end of input lines

1441 self.state_machine.previous_line()

1442 return 1

1443 else:

1444 self.state_machine.previous_line()

1445 if not next_line[:1].strip(): # blank or indented

1446 return 1

1447 result = self.make_enumerator(ordinal + 1, sequence, format)

1448 if result:

1449 next_enumerator, auto_enumerator = result

1450 try:

1451 if next_line.startswith((next_enumerator, auto_enumerator)):

1452 return 1

1453 except TypeError:

1454 pass

1455 return None

1456

1457 def make_enumerator(self, ordinal, sequence, format):

1458 """

1459 Construct and return the next enumerated list item marker, and an

1460 auto-enumerator ("#" instead of the regular enumerator).

1461

1462 Return ``None`` for invalid (out of range) ordinals.

1463 """

1464 if sequence == '#':

1465 enumerator = '#'

1466 elif sequence == 'arabic':

1467 enumerator = str(ordinal)

1468 else:

1469 if sequence.endswith('alpha'):

1470 if ordinal > 26:

1471 return None

1472 enumerator = chr(ordinal + ord('a') - 1)

1473 elif sequence.endswith('roman'):

1474 try:

1475 enumerator = RomanNumeral(ordinal).to_uppercase()

1476 except TypeError:

1477 return None

1478 else: # shouldn't happen

1479 raise ParserError('unknown enumerator sequence: "%s"'

1480 % sequence)

1481 if sequence.startswith('lower'):

1482 enumerator = enumerator.lower()

1483 elif sequence.startswith('upper'):

1484 enumerator = enumerator.upper()

1485 else: # shouldn't happen

1486 raise ParserError('unknown enumerator sequence: "%s"'

1487 % sequence)

1488 formatinfo = self.enum.formatinfo[format]

1489 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix

1490 + ' ')

1491 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '

1492 return next_enumerator, auto_enumerator

1493

1494 def field_marker(self, match, context, next_state):

1495 """Field list item."""

1496 field_list = nodes.field_list()

1497 self.parent += field_list

1498 field, blank_finish = self.field(match)

1499 field_list += field

1500 offset = self.state_machine.line_offset + 1 # next line

1501 newline_offset, blank_finish = self.nested_list_parse(

1502 self.state_machine.input_lines[offset:],

1503 input_offset=self.state_machine.abs_line_offset() + 1,

1504 node=field_list, initial_state='FieldList',

1505 blank_finish=blank_finish)

1506 self.goto_line(newline_offset)

1507 if not blank_finish:

1508 self.parent += self.unindent_warning('Field list')

1509 return [], next_state, []

1510

1511 def field(self, match):

1512 name = self.parse_field_marker(match)

1513 src, srcline = self.state_machine.get_source_and_line()

1514 lineno = self.state_machine.abs_line_number()

1515 (indented, indent, line_offset, blank_finish

1516 ) = self.state_machine.get_first_known_indented(match.end())

1517 field_node = nodes.field()

1518 field_node.source = src

1519 field_node.line = srcline

1520 name_nodes, name_messages = self.inline_text(name, lineno)

1521 field_node += nodes.field_name(name, '', *name_nodes)

1522 field_body = nodes.field_body('\n'.join(indented), *name_messages)

1523 field_node += field_body

1524 if indented:

1525 self.parse_field_body(indented, line_offset, field_body)

1526 return field_node, blank_finish

1527

1528 def parse_field_marker(self, match):

1529 """Extract & return field name from a field marker match."""

1530 field = match.group()[1:] # strip off leading ':'

1531 field = field[:field.rfind(':')] # strip off trailing ':' etc.

1532 return field

1533

1534 def parse_field_body(self, indented, offset, node) -> None:

1535 self.nested_parse(indented, input_offset=offset, node=node)

1536

1537 def option_marker(self, match, context, next_state):

1538 """Option list item."""

1539 optionlist = nodes.option_list()

1540 (optionlist.source, optionlist.line

1541 ) = self.state_machine.get_source_and_line()

1542 try:

1543 listitem, blank_finish = self.option_list_item(match)

1544 except MarkupError as error:

1545 # This shouldn't happen; pattern won't match.

1546 msg = self.reporter.error('Invalid option list marker: %s'

1547 % error)

1548 self.parent += msg

1549 (indented, indent, line_offset, blank_finish

1550 ) = self.state_machine.get_first_known_indented(match.end())

1551 elements = self.block_quote(indented, line_offset)

1552 self.parent += elements

1553 if not blank_finish:

1554 self.parent += self.unindent_warning('Option list')

1555 return [], next_state, []

1556 self.parent += optionlist

1557 optionlist += listitem

1558 offset = self.state_machine.line_offset + 1 # next line

1559 newline_offset, blank_finish = self.nested_list_parse(

1560 self.state_machine.input_lines[offset:],

1561 input_offset=self.state_machine.abs_line_offset() + 1,

1562 node=optionlist, initial_state='OptionList',

1563 blank_finish=blank_finish)

1564 self.goto_line(newline_offset)

1565 if not blank_finish:

1566 self.parent += self.unindent_warning('Option list')

1567 return [], next_state, []

1568

1569 def option_list_item(self, match):

1570 offset = self.state_machine.abs_line_offset()

1571 options = self.parse_option_marker(match)

1572 (indented, indent, line_offset, blank_finish

1573 ) = self.state_machine.get_first_known_indented(match.end())

1574 if not indented: # not an option list item

1575 self.goto_line(offset)

1576 raise statemachine.TransitionCorrection('text')

1577 option_group = nodes.option_group('', *options)

1578 description = nodes.description('\n'.join(indented))

1579 option_list_item = nodes.option_list_item('', option_group,

1580 description)

1581 if indented:

1582 self.nested_parse(indented, input_offset=line_offset,

1583 node=description)

1584 return option_list_item, blank_finish

1585

1586 def parse_option_marker(self, match):

1587 """

1588 Return a list of `node.option` and `node.option_argument` objects,

1589 parsed from an option marker match.

1590

1591 :Exception: `MarkupError` for invalid option markers.

1592 """

1593 optlist = []

1594 # split at ", ", except inside < > (complex arguments)

1595 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())

1596 for optionstring in optionstrings:

1597 tokens = optionstring.split()

1598 delimiter = ' '

1599 firstopt = tokens[0].split('=', 1)

1600 if len(firstopt) > 1:

1601 # "--opt=value" form

1602 tokens[:1] = firstopt

1603 delimiter = '='

1604 elif (len(tokens[0]) > 2

1605 and ((tokens[0].startswith('-')

1606 and not tokens[0].startswith('--'))

1607 or tokens[0].startswith('+'))):

1608 # "-ovalue" form

1609 tokens[:1] = [tokens[0][:2], tokens[0][2:]]

1610 delimiter = ''

1611 if len(tokens) > 1 and (tokens[1].startswith('<')

1612 and tokens[-1].endswith('>')):

1613 # "-o <value1 value2>" form; join all values into one token

1614 tokens[1:] = [' '.join(tokens[1:])]

1615 if 0 < len(tokens) <= 2:

1616 option = nodes.option(optionstring)

1617 option += nodes.option_string(tokens[0], tokens[0])

1618 if len(tokens) > 1:

1619 option += nodes.option_argument(tokens[1], tokens[1],

1620 delimiter=delimiter)

1621 optlist.append(option)

1622 else:

1623 raise MarkupError(

1624 'wrong number of option tokens (=%s), should be 1 or 2: '

1625 '"%s"' % (len(tokens), optionstring))

1626 return optlist

1627

1628 def doctest(self, match, context, next_state):

1629 line = self.document.current_line

1630 data = '\n'.join(self.state_machine.get_text_block())

1631 # TODO: Parse with `directives.body.CodeBlock` with

1632 # argument 'pycon' (Python Console) in Docutils 1.0.

1633 n = nodes.doctest_block(data, data)

1634 n.line = line

1635 self.parent += n

1636 return [], next_state, []

1637

1638 def line_block(self, match, context, next_state):

1639 """First line of a line block."""

1640 block = nodes.line_block()

1641 self.parent += block

1642 lineno = self.state_machine.abs_line_number()

1643 (block.source,

1644 block.line) = self.state_machine.get_source_and_line(lineno)

1645 line, messages, blank_finish = self.line_block_line(match, lineno)

1646 block += line

1647 self.parent += messages

1648 if not blank_finish:

1649 offset = self.state_machine.line_offset + 1 # next line

1650 new_line_offset, blank_finish = self.nested_list_parse(

1651 self.state_machine.input_lines[offset:],

1652 input_offset=self.state_machine.abs_line_offset() + 1,

1653 node=block, initial_state='LineBlock',

1654 blank_finish=False)

1655 self.goto_line(new_line_offset)

1656 if not blank_finish:

1657 self.parent += self.reporter.warning(

1658 'Line block ends without a blank line.',

1659 line=lineno+1)

1660 if len(block):

1661 if block[0].indent is None:

1662 block[0].indent = 0

1663 self.nest_line_block_lines(block)

1664 return [], next_state, []

1665

1666 def line_block_line(self, match, lineno):

1667 """Return one line element of a line_block."""

1668 (indented, indent, line_offset, blank_finish

1669 ) = self.state_machine.get_first_known_indented(match.end(),

1670 until_blank=True)

1671 text = '\n'.join(indented)

1672 text_nodes, messages = self.inline_text(text, lineno)

1673 line = nodes.line(text, '', *text_nodes)

1674 (line.source,

1675 line.line) = self.state_machine.get_source_and_line(lineno)

1676 if match.string.rstrip() != '|': # not empty

1677 line.indent = len(match.group(1)) - 1

1678 return line, messages, blank_finish

1679

1680 def nest_line_block_lines(self, block) -> None:

1681 for index in range(1, len(block)):

1682 if block[index].indent is None:

1683 block[index].indent = block[index - 1].indent

1684 self.nest_line_block_segment(block)

1685

1686 def nest_line_block_segment(self, block) -> None:

1687 indents = [item.indent for item in block]

1688 least = min(indents)

1689 new_items = []

1690 new_block = nodes.line_block()

1691 for item in block:

1692 if item.indent > least:

1693 new_block.append(item)

1694 else:

1695 if len(new_block):

1696 self.nest_line_block_segment(new_block)

1697 new_items.append(new_block)

1698 new_block = nodes.line_block()

1699 new_items.append(item)

1700 if len(new_block):

1701 self.nest_line_block_segment(new_block)

1702 new_items.append(new_block)

1703 block[:] = new_items

1704

1705 def grid_table_top(self, match, context, next_state):

1706 """Top border of a full table."""

1707 return self.table_top(match, context, next_state,

1708 self.isolate_grid_table,

1709 tableparser.GridTableParser)

1710

1711 def simple_table_top(self, match, context, next_state):

1712 """Top border of a simple table."""

1713 return self.table_top(match, context, next_state,

1714 self.isolate_simple_table,

1715 tableparser.SimpleTableParser)

1716

1717 def table_top(self, match, context, next_state,

1718 isolate_function, parser_class):

1719 """Top border of a generic table."""

1720 nodelist, blank_finish = self.table(isolate_function, parser_class)

1721 self.parent += nodelist

1722 if not blank_finish:

1723 msg = self.reporter.warning(

1724 'Blank line required after table.',

1725 line=self.state_machine.abs_line_number()+1)

1726 self.parent += msg

1727 return [], next_state, []

1728

1729 def table(self, isolate_function, parser_class):

1730 """Parse a table."""

1731 block, messages, blank_finish = isolate_function()

1732 if block:

1733 try:

1734 parser = parser_class()

1735 tabledata = parser.parse(block)

1736 tableline = (self.state_machine.abs_line_number() - len(block)

1737 + 1)

1738 table = self.build_table(tabledata, tableline)

1739 nodelist = [table] + messages

1740 except tableparser.TableMarkupError as err:

1741 nodelist = self.malformed_table(block, ' '.join(err.args),

1742 offset=err.offset) + messages

1743 else:

1744 nodelist = messages

1745 return nodelist, blank_finish

1746

1747 def isolate_grid_table(self):

1748 messages = []

1749 blank_finish = True

1750 try:

1751 block = self.state_machine.get_text_block(flush_left=True)

1752 except statemachine.UnexpectedIndentationError as err:

1753 block, src, srcline = err.args

1754 messages.append(self.reporter.error('Unexpected indentation.',

1755 source=src, line=srcline))

1756 blank_finish = False

1757 block.disconnect()

1758 # for East Asian chars:

1759 block.pad_double_width(self.double_width_pad_char)

1760 width = len(block[0].strip())

1761 for i in range(len(block)):

1762 block[i] = block[i].strip()

1763 if block[i][0] not in '+|': # check left edge

1764 blank_finish = False

1765 self.state_machine.previous_line(len(block) - i)

1766 del block[i:]

1767 break

1768 if not self.grid_table_top_pat.match(block[-1]): # find bottom

1769 # from second-last to third line of table:

1770 for i in range(len(block) - 2, 1, -1):

1771 if self.grid_table_top_pat.match(block[i]):

1772 self.state_machine.previous_line(len(block) - i + 1)

1773 del block[i+1:]

1774 blank_finish = False

1775 break

1776 else:

1777 detail = 'Bottom border missing or corrupt.'

1778 messages.extend(self.malformed_table(block, detail, i))

1779 return [], messages, blank_finish

1780 for i in range(len(block)): # check right edge

1781 if len(block[i]) != width or block[i][-1] not in '+|':

1782 detail = 'Right border not aligned or missing.'

1783 messages.extend(self.malformed_table(block, detail, i))

1784 return [], messages, blank_finish

1785 return block, messages, blank_finish

1786

1787 def isolate_simple_table(self):

1788 start = self.state_machine.line_offset

1789 lines = self.state_machine.input_lines

1790 limit = len(lines) - 1

1791 toplen = len(lines[start].strip())

1792 pattern_match = self.simple_table_border_pat.match

1793 found = 0

1794 found_at = None

1795 i = start + 1

1796 while i <= limit:

1797 line = lines[i]

1798 match = pattern_match(line)

1799 if match:

1800 if len(line.strip()) != toplen:

1801 self.state_machine.next_line(i - start)

1802 messages = self.malformed_table(

1803 lines[start:i+1], 'Bottom border or header rule does '

1804 'not match top border.', i-start)

1805 return [], messages, i == limit or not lines[i+1].strip()

1806 found += 1

1807 found_at = i

1808 if found == 2 or i == limit or not lines[i+1].strip():

1809 end = i

1810 break

1811 i += 1

1812 else: # reached end of input_lines

1813 details = 'No bottom table border found'

1814 if found:

1815 details += ' or no blank line after table bottom'

1816 self.state_machine.next_line(found_at - start)

1817 block = lines[start:found_at+1]

1818 else:

1819 self.state_machine.next_line(i - start - 1)

1820 block = lines[start:]

1821 messages = self.malformed_table(block, details + '.')

1822 return [], messages, not found

1823 self.state_machine.next_line(end - start)

1824 block = lines[start:end+1]

1825 # for East Asian chars:

1826 block.pad_double_width(self.double_width_pad_char)

1827 return block, [], end == limit or not lines[end+1].strip()

1828

1829 def malformed_table(self, block, detail='', offset=0):

1830 block.replace(self.double_width_pad_char, '')

1831 data = '\n'.join(block)

1832 message = 'Malformed table.'

1833 startline = self.state_machine.abs_line_number() - len(block) + 1

1834 if detail:

1835 message += '\n' + detail

1836 error = self.reporter.error(message, nodes.literal_block(data, data),

1837 line=startline+offset)

1838 return [error]

1839

1840 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):

1841 colwidths, headrows, bodyrows = tabledata

1842 table = nodes.table()

1843 if widths == 'auto':

1844 table['classes'] += ['colwidths-auto']

1845 elif widths: # "grid" or list of integers

1846 table['classes'] += ['colwidths-given']

1847 tgroup = nodes.tgroup(cols=len(colwidths))

1848 table += tgroup

1849 for colwidth in colwidths:

1850 colspec = nodes.colspec(colwidth=colwidth)

1851 if stub_columns:

1852 colspec.attributes['stub'] = True

1853 stub_columns -= 1

1854 tgroup += colspec

1855 if headrows:

1856 thead = nodes.thead()

1857 tgroup += thead

1858 for row in headrows:

1859 thead += self.build_table_row(row, tableline)

1860 tbody = nodes.tbody()

1861 tgroup += tbody

1862 for row in bodyrows:

1863 tbody += self.build_table_row(row, tableline)

1864 return table

1865

1866 def build_table_row(self, rowdata, tableline):

1867 row = nodes.row()

1868 for cell in rowdata:

1869 if cell is None:

1870 continue

1871 morerows, morecols, offset, cellblock = cell

1872 attributes = {}

1873 if morerows:

1874 attributes['morerows'] = morerows

1875 if morecols:

1876 attributes['morecols'] = morecols

1877 entry = nodes.entry(**attributes)

1878 row += entry

1879 if ''.join(cellblock):

1880 self.nested_parse(cellblock, input_offset=tableline+offset,

1881 node=entry)

1882 return row

1883

1884 explicit = Struct()

1885 """Patterns and constants used for explicit markup recognition."""

1886

1887 explicit.patterns = Struct(

1888 target=re.compile(r"""

1889 (

1890 _ # anonymous target

1891 | # *OR*

1892 (?!_) # no underscore at the beginning

1893 (?P<quote>`?) # optional open quote

1894 (?![ `]) # first char. not space or

1895 # backquote

1896 (?P<name> # reference name

1897 .+?

1898 )

1899 %(non_whitespace_escape_before)s

1900 (?P=quote) # close quote if open quote used

1901 )

1902 (?<!(?<!\x00):) # no unescaped colon at end

1903 %(non_whitespace_escape_before)s

1904 [ ]? # optional space

1905 : # end of reference name

1906 ([ ]+|$) # followed by whitespace

1907 """ % vars(Inliner), re.VERBOSE),

1908 reference=re.compile(r"""

1909 (

1910 (?P<simple>%(simplename)s)_

1911 | # *OR*

1912 ` # open backquote

1913 (?![ ]) # not space

1914 (?P<phrase>.+?) # hyperlink phrase

1915 %(non_whitespace_escape_before)s

1916 `_ # close backquote,

1917 # reference mark

1918 )

1919 $ # end of string

1920 """ % vars(Inliner), re.VERBOSE),

1921 substitution=re.compile(r"""

1922 (

1923 (?![ ]) # first char. not space

1924 (?P<name>.+?) # substitution text

1925 %(non_whitespace_escape_before)s

1926 \| # close delimiter

1927 )

1928 ([ ]+|$) # followed by whitespace

1929 """ % vars(Inliner),

1930 re.VERBOSE),)

1931

1932 def footnote(self, match):

1933 src, srcline = self.state_machine.get_source_and_line()

1934 (indented, indent, offset, blank_finish

1935 ) = self.state_machine.get_first_known_indented(match.end())

1936 label = match.group(1)

1937 name = normalize_name(label)

1938 footnote = nodes.footnote('\n'.join(indented))

1939 footnote.source = src

1940 footnote.line = srcline

1941 if name[0] == '#': # auto-numbered

1942 name = name[1:] # autonumber label

1943 footnote['auto'] = 1

1944 if name:

1945 footnote['names'].append(name)

1946 self.document.note_autofootnote(footnote)

1947 elif name == '*': # auto-symbol

1948 name = ''

1949 footnote['auto'] = '*'

1950 self.document.note_symbol_footnote(footnote)

1951 else: # manually numbered

1952 footnote += nodes.label('', label)

1953 footnote['names'].append(name)

1954 self.document.note_footnote(footnote)

1955 if name:

1956 self.document.note_explicit_target(footnote, footnote)

1957 else:

1958 self.document.set_id(footnote, footnote)

1959 if indented:

1960 self.nested_parse(indented, input_offset=offset, node=footnote)

1961 else:

1962 footnote += self.reporter.warning('Footnote content expected.')

1963 return [footnote], blank_finish

1964

1965 def citation(self, match):

1966 src, srcline = self.state_machine.get_source_and_line()

1967 (indented, indent, offset, blank_finish

1968 ) = self.state_machine.get_first_known_indented(match.end())

1969 label = match.group(1)

1970 name = normalize_name(label)

1971 citation = nodes.citation('\n'.join(indented))

1972 citation.source = src

1973 citation.line = srcline

1974 citation += nodes.label('', label)

1975 citation['names'].append(name)

1976 self.document.note_citation(citation)

1977 self.document.note_explicit_target(citation, citation)

1978 if indented:

1979 self.nested_parse(indented, input_offset=offset, node=citation)

1980 else:

1981 citation += self.reporter.warning('Citation content expected.')

1982 return [citation], blank_finish

1983

1984 def hyperlink_target(self, match):

1985 pattern = self.explicit.patterns.target

1986 lineno = self.state_machine.abs_line_number()

1987 (block, indent, offset, blank_finish

1988 ) = self.state_machine.get_first_known_indented(

1989 match.end(), until_blank=True, strip_indent=False)

1990 blocktext = match.string[:match.end()] + '\n'.join(block)

1991 block = [escape2null(line) for line in block]

1992 escaped = block[0]

1993 blockindex = 0

1994 while True:

1995 targetmatch = pattern.match(escaped)

1996 if targetmatch:

1997 break

1998 blockindex += 1

1999 try:

2000 escaped += block[blockindex]

2001 except IndexError:

2002 raise MarkupError('malformed hyperlink target.')

2003 del block[:blockindex]

2004 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()

2005 target = self.make_target(block, blocktext, lineno,

2006 targetmatch.group('name'))

2007 return [target], blank_finish

2008

2009 def make_target(self, block, block_text, lineno, target_name):

2010 target_type, data = self.parse_target(block, block_text, lineno)

2011 if target_type == 'refname':

2012 target = nodes.target(block_text, '', refname=normalize_name(data))

2013 target.indirect_reference_name = data

2014 self.add_target(target_name, '', target, lineno)

2015 self.document.note_indirect_target(target)

2016 return target

2017 elif target_type == 'refuri':

2018 target = nodes.target(block_text, '')

2019 self.add_target(target_name, data, target, lineno)

2020 return target

2021 else:

2022 return data

2023

2024 def parse_target(self, block, block_text, lineno):

2025 """

2026 Determine the type of reference of a target.

2027

2028 :Return: A 2-tuple, one of:

2029

2030 - 'refname' and the indirect reference name

2031 - 'refuri' and the URI

2032 - 'malformed' and a system_message node

2033 """

2034 if block and block[-1].strip()[-1:] == '_': # possible indirect target

2035 reference = ' '.join(line.strip() for line in block)

2036 refname = self.is_reference(reference)

2037 if refname:

2038 return 'refname', refname

2039 ref_parts = split_escaped_whitespace(' '.join(block))

2040 reference = ' '.join(''.join(unescape(part).split())

2041 for part in ref_parts)

2042 return 'refuri', reference

2043

2044 def is_reference(self, reference):

2045 match = self.explicit.patterns.reference.match(

2046 whitespace_normalize_name(reference))

2047 if not match:

2048 return None

2049 return unescape(match.group('simple') or match.group('phrase'))

2050

2051 def add_target(self, targetname, refuri, target, lineno):

2052 target.line = lineno

2053 if targetname:

2054 name = normalize_name(unescape(targetname))

2055 target['names'].append(name)

2056 if refuri:

2057 uri = self.inliner.adjust_uri(refuri)

2058 if uri:

2059 target['refuri'] = uri

2060 else:

2061 raise ApplicationError('problem with URI: %r' % refuri)

2062 self.document.note_explicit_target(target, self.parent)

2063 else: # anonymous target

2064 if refuri:

2065 target['refuri'] = refuri

2066 target['anonymous'] = True

2067 self.document.note_anonymous_target(target)

2068

2069 def substitution_def(self, match):

2070 pattern = self.explicit.patterns.substitution

2071 src, srcline = self.state_machine.get_source_and_line()

2072 (block, indent, offset, blank_finish

2073 ) = self.state_machine.get_first_known_indented(match.end(),

2074 strip_indent=False)

2075 blocktext = (match.string[:match.end()] + '\n'.join(block))

2076 block.disconnect()

2077 escaped = escape2null(block[0].rstrip())

2078 blockindex = 0

2079 while True:

2080 subdefmatch = pattern.match(escaped)

2081 if subdefmatch:

2082 break

2083 blockindex += 1

2084 try:

2085 escaped = escaped + ' ' + escape2null(

2086 block[blockindex].strip())

2087 except IndexError:

2088 raise MarkupError('malformed substitution definition.')

2089 del block[:blockindex] # strip out the substitution marker

2090 start = subdefmatch.end()-len(escaped)-1

2091 block[0] = (block[0].strip() + ' ')[start:-1]

2092 if not block[0]:

2093 del block[0]

2094 offset += 1

2095 while block and not block[-1].strip():

2096 block.pop()

2097 subname = subdefmatch.group('name')

2098 substitution_node = nodes.substitution_definition(blocktext)

2099 substitution_node.source = src

2100 substitution_node.line = srcline

2101 if not block:

2102 msg = self.reporter.warning(

2103 'Substitution definition "%s" missing contents.' % subname,

2104 nodes.literal_block(blocktext, blocktext),

2105 source=src, line=srcline)

2106 return [msg], blank_finish

2107 block[0] = block[0].strip()

2108 substitution_node['names'].append(

2109 nodes.whitespace_normalize_name(subname))

2110 new_abs_offset, blank_finish = self.nested_list_parse(

2111 block, input_offset=offset, node=substitution_node,

2112 initial_state='SubstitutionDef', blank_finish=blank_finish)

2113 i = 0

2114 for node in substitution_node[:]:

2115 if not (isinstance(node, nodes.Inline)

2116 or isinstance(node, nodes.Text)):

2117 self.parent += substitution_node[i]

2118 del substitution_node[i]

2119 else:

2120 i += 1

2121 for node in substitution_node.findall(nodes.Element):

2122 if self.disallowed_inside_substitution_definitions(node):

2123 pformat = nodes.literal_block('', node.pformat().rstrip())

2124 msg = self.reporter.error(

2125 'Substitution definition contains illegal element <%s>:'

2126 % node.tagname,

2127 pformat, nodes.literal_block(blocktext, blocktext),

2128 source=src, line=srcline)

2129 return [msg], blank_finish

2130 if len(substitution_node) == 0:

2131 msg = self.reporter.warning(

2132 'Substitution definition "%s" empty or invalid.' % subname,

2133 nodes.literal_block(blocktext, blocktext),

2134 source=src, line=srcline)

2135 return [msg], blank_finish

2136 self.document.note_substitution_def(

2137 substitution_node, subname, self.parent)

2138 return [substitution_node], blank_finish

2139

2140 def disallowed_inside_substitution_definitions(self, node) -> bool:

2141 if (node['ids']

2142 or isinstance(node, nodes.reference) and node.get('anonymous')

2143 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501

2144 return True

2145 else:

2146 return False

2147

2148 def directive(self, match, **option_presets):

2149 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""

2150 type_name = match.group(1)

2151 directive_class, messages = directives.directive(

2152 type_name, self.memo.language, self.document)

2153 self.parent += messages

2154 if directive_class:

2155 return self.run_directive(

2156 directive_class, match, type_name, option_presets)

2157 else:

2158 return self.unknown_directive(type_name)

2159

2160 def run_directive(self, directive, match, type_name, option_presets):

2161 """

2162 Parse a directive then run its directive function.

2163

2164 Parameters:

2165

2166 - `directive`: The class implementing the directive. Must be

2167 a subclass of `rst.Directive`.

2168

2169 - `match`: A regular expression match object which matched the first

2170 line of the directive.

2171

2172 - `type_name`: The directive name, as used in the source text.

2173

2174 - `option_presets`: A dictionary of preset options, defaults for the

2175 directive options. Currently, only an "alt" option is passed by

2176 substitution definitions (value: the substitution name), which may

2177 be used by an embedded image directive.

2178

2179 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.

2180 """

2181 if isinstance(directive, (FunctionType, MethodType)):

2182 from docutils.parsers.rst import convert_directive_function

2183 directive = convert_directive_function(directive)

2184 lineno = self.state_machine.abs_line_number()

2185 initial_line_offset = self.state_machine.line_offset

2186 (indented, indent, line_offset, blank_finish

2187 ) = self.state_machine.get_first_known_indented(match.end(),

2188 strip_top=0)

2189 block_text = '\n'.join(self.state_machine.input_lines[

2190 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501

2191 try:

2192 arguments, options, content, content_offset = (

2193 self.parse_directive_block(indented, line_offset,

2194 directive, option_presets))

2195 except MarkupError as detail:

2196 error = self.reporter.error(

2197 'Error in "%s" directive:\n%s.' % (type_name,

2198 ' '.join(detail.args)),

2199 nodes.literal_block(block_text, block_text), line=lineno)

2200 return [error], blank_finish

2201 directive_instance = directive(

2202 type_name, arguments, options, content, lineno,

2203 content_offset, block_text, self, self.state_machine)

2204 try:

2205 result = directive_instance.run()

2206 except docutils.parsers.rst.DirectiveError as error:

2207 msg_node = self.reporter.system_message(error.level, error.msg,

2208 line=lineno)

2209 msg_node += nodes.literal_block(block_text, block_text)

2210 result = [msg_node]

2211 assert isinstance(result, list), \

2212 'Directive "%s" must return a list of nodes.' % type_name

2213 for i in range(len(result)):

2214 assert isinstance(result[i], nodes.Node), \

2215 ('Directive "%s" returned non-Node object (index %s): %r'

2216 % (type_name, i, result[i]))

2217 return (result,

2218 blank_finish or self.state_machine.is_next_line_blank())

2219

2220 def parse_directive_block(self, indented, line_offset, directive,

2221 option_presets):

2222 option_spec = directive.option_spec

2223 has_content = directive.has_content

2224 if indented and not indented[0].strip():

2225 indented.trim_start()

2226 line_offset += 1

2227 while indented and not indented[-1].strip():

2228 indented.trim_end()

2229 if indented and (directive.required_arguments

2230 or directive.optional_arguments

2231 or option_spec):

2232 for i, line in enumerate(indented):

2233 if not line.strip():

2234 break

2235 else:

2236 i += 1

2237 arg_block = indented[:i]

2238 content = indented[i+1:]

2239 content_offset = line_offset + i + 1

2240 else:

2241 content = indented

2242 content_offset = line_offset

2243 arg_block = []

2244 if option_spec:

2245 options, arg_block = self.parse_directive_options(

2246 option_presets, option_spec, arg_block)

2247 else:

2248 options = {}

2249 if arg_block and not (directive.required_arguments

2250 or directive.optional_arguments):

2251 content = arg_block + indented[i:]

2252 content_offset = line_offset

2253 arg_block = []

2254 while content and not content[0].strip():

2255 content.trim_start()

2256 content_offset += 1

2257 if directive.required_arguments or directive.optional_arguments:

2258 arguments = self.parse_directive_arguments(

2259 directive, arg_block)

2260 else:

2261 arguments = []

2262 if content and not has_content:

2263 raise MarkupError('no content permitted')

2264 return arguments, options, content, content_offset

2265

2266 def parse_directive_options(self, option_presets, option_spec, arg_block):

2267 options = option_presets.copy()

2268 for i, line in enumerate(arg_block):

2269 if re.match(Body.patterns['field_marker'], line):

2270 opt_block = arg_block[i:]

2271 arg_block = arg_block[:i]

2272 break

2273 else:

2274 opt_block = []

2275 if opt_block:

2276 success, data = self.parse_extension_options(option_spec,

2277 opt_block)

2278 if success: # data is a dict of options

2279 options.update(data)

2280 else: # data is an error string

2281 raise MarkupError(data)

2282 return options, arg_block

2283

2284 def parse_directive_arguments(self, directive, arg_block):

2285 required = directive.required_arguments

2286 optional = directive.optional_arguments

2287 arg_text = '\n'.join(arg_block)

2288 arguments = arg_text.split()

2289 if len(arguments) < required:

2290 raise MarkupError('%s argument(s) required, %s supplied'

2291 % (required, len(arguments)))

2292 elif len(arguments) > required + optional:

2293 if directive.final_argument_whitespace:

2294 arguments = arg_text.split(None, required + optional - 1)

2295 else:

2296 raise MarkupError(

2297 'maximum %s argument(s) allowed, %s supplied'

2298 % (required + optional, len(arguments)))

2299 return arguments

2300

2301 def parse_extension_options(self, option_spec, datalines):

2302 """

2303 Parse `datalines` for a field list containing extension options

2304 matching `option_spec`.

2305

2306 :Parameters:

2307 - `option_spec`: a mapping of option name to conversion

2308 function, which should raise an exception on bad input.

2309 - `datalines`: a list of input strings.

2310

2311 :Return:

2312 - Success value, 1 or 0.

2313 - An option dictionary on success, an error string on failure.

2314 """

2315 node = nodes.field_list()

2316 newline_offset, blank_finish = self.nested_list_parse(

2317 datalines, 0, node, initial_state='ExtensionOptions',

2318 blank_finish=True)

2319 if newline_offset != len(datalines): # incomplete parse of block

2320 return 0, 'invalid option block'

2321 try:

2322 options = utils.extract_extension_options(node, option_spec)

2323 except KeyError as detail:

2324 return 0, 'unknown option: "%s"' % detail.args[0]

2325 except (ValueError, TypeError) as detail:

2326 return 0, 'invalid option value: %s' % ' '.join(detail.args)

2327 except utils.ExtensionOptionError as detail:

2328 return 0, 'invalid option data: %s' % ' '.join(detail.args)

2329 if blank_finish:

2330 return 1, options

2331 else:

2332 return 0, 'option data incompletely parsed'

2333

2334 def unknown_directive(self, type_name):

2335 lineno = self.state_machine.abs_line_number()

2336 (indented, indent, offset, blank_finish

2337 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)

2338 text = '\n'.join(indented)

2339 error = self.reporter.error('Unknown directive type "%s".' % type_name,

2340 nodes.literal_block(text, text),

2341 line=lineno)

2342 return [error], blank_finish

2343

2344 def comment(self, match):

2345 if self.state_machine.is_next_line_blank():

2346 first_comment_line = match.string[match.end():]

2347 if not first_comment_line.strip(): # empty comment

2348 return [nodes.comment()], True # "A tiny but practical wart."

2349 if first_comment_line.startswith('end of inclusion from "'):

2350 # cf. parsers.rst.directives.misc.Include

2351 self.document.include_log.pop()

2352 return [], True

2353 (indented, indent, offset, blank_finish

2354 ) = self.state_machine.get_first_known_indented(match.end())

2355 while indented and not indented[-1].strip():

2356 indented.trim_end()

2357 text = '\n'.join(indented)

2358 return [nodes.comment(text, text)], blank_finish

2359

2360 explicit.constructs = [

2361 (footnote,

2362 re.compile(r"""

2363 \.\.[ ]+ # explicit markup start

2364 \[

2365 ( # footnote label:

2366 [0-9]+ # manually numbered footnote

2367 | # *OR*

2368 \# # anonymous auto-numbered footnote

2369 | # *OR*

2370 \#%s # auto-number ed?) footnote label

2371 | # *OR*

2372 \* # auto-symbol footnote

2373 )

2374 \]

2375 ([ ]+|$) # whitespace or end of line

2376 """ % Inliner.simplename, re.VERBOSE)),

2377 (citation,

2378 re.compile(r"""

2379 \.\.[ ]+ # explicit markup start

2380 \[(%s)\] # citation label

2381 ([ ]+|$) # whitespace or end of line

2382 """ % Inliner.simplename, re.VERBOSE)),

2383 (hyperlink_target,

2384 re.compile(r"""

2385 \.\.[ ]+ # explicit markup start

2386 _ # target indicator

2387 (?![ ]|$) # first char. not space or EOL

2388 """, re.VERBOSE)),

2389 (substitution_def,

2390 re.compile(r"""

2391 \.\.[ ]+ # explicit markup start

2392 \| # substitution indicator

2393 (?![ ]|$) # first char. not space or EOL

2394 """, re.VERBOSE)),

2395 (directive,

2396 re.compile(r"""

2397 \.\.[ ]+ # explicit markup start

2398 (%s) # directive name

2399 [ ]? # optional space

2400 :: # directive delimiter

2401 ([ ]+|$) # whitespace or end of line

2402 """ % Inliner.simplename, re.VERBOSE))]

2403

2404 def explicit_markup(self, match, context, next_state):

2405 """Footnotes, hyperlink targets, directives, comments."""

2406 nodelist, blank_finish = self.explicit_construct(match)

2407 self.parent += nodelist

2408 self.explicit_list(blank_finish)

2409 return [], next_state, []

2410

2411 def explicit_construct(self, match):

2412 """Determine which explicit construct this is, parse & return it."""

2413 errors = []

2414 for method, pattern in self.explicit.constructs:

2415 expmatch = pattern.match(match.string)

2416 if expmatch:

2417 try:

2418 return method(self, expmatch)

2419 except MarkupError as error:

2420 lineno = self.state_machine.abs_line_number()

2421 message = ' '.join(error.args)

2422 errors.append(self.reporter.warning(message, line=lineno))

2423 break

2424 nodelist, blank_finish = self.comment(match)

2425 return nodelist + errors, blank_finish

2426

2427 def explicit_list(self, blank_finish) -> None:

2428 """

2429 Create a nested state machine for a series of explicit markup

2430 constructs (including anonymous hyperlink targets).

2431 """

2432 offset = self.state_machine.line_offset + 1 # next line

2433 newline_offset, blank_finish = self.nested_list_parse(

2434 self.state_machine.input_lines[offset:],

2435 input_offset=self.state_machine.abs_line_offset() + 1,

2436 node=self.parent, initial_state='Explicit',

2437 blank_finish=blank_finish,

2438 match_titles=self.state_machine.match_titles)

2439 self.goto_line(newline_offset)

2440 if not blank_finish:

2441 self.parent += self.unindent_warning('Explicit markup')

2442

2443 def anonymous(self, match, context, next_state):

2444 """Anonymous hyperlink targets."""

2445 nodelist, blank_finish = self.anonymous_target(match)

2446 self.parent += nodelist

2447 self.explicit_list(blank_finish)

2448 return [], next_state, []

2449

2450 def anonymous_target(self, match):

2451 lineno = self.state_machine.abs_line_number()

2452 (block, indent, offset, blank_finish

2453 ) = self.state_machine.get_first_known_indented(match.end(),

2454 until_blank=True)

2455 blocktext = match.string[:match.end()] + '\n'.join(block)

2456 block = [escape2null(line) for line in block]

2457 target = self.make_target(block, blocktext, lineno, '')

2458 return [target], blank_finish

2459

2460 def line(self, match, context, next_state):

2461 """Section title overline or transition marker."""

2462 if self.state_machine.match_titles:

2463 return [match.string], 'Line', []

2464 elif match.string.strip() == '::':

2465 raise statemachine.TransitionCorrection('text')

2466 elif len(match.string.strip()) < 4:

2467 msg = self.reporter.info(

2468 'Unexpected possible title overline or transition.\n'

2469 "Treating it as ordinary text because it's so short.",

2470 line=self.state_machine.abs_line_number())

2471 self.parent += msg

2472 raise statemachine.TransitionCorrection('text')

2473 else:

2474 blocktext = self.state_machine.line

2475 msg = self.reporter.error(

2476 'Unexpected section title or transition.',

2477 nodes.literal_block(blocktext, blocktext),

2478 line=self.state_machine.abs_line_number())

2479 self.parent += msg

2480 return [], next_state, []

2481

2482 def text(self, match, context, next_state):

2483 """Titles, definition lists, paragraphs."""

2484 return [match.string], 'Text', []

2485

2486

2487class RFC2822Body(Body):

2488

2489 """

2490 RFC2822 headers are only valid as the first constructs in documents. As

2491 soon as anything else appears, the `Body` state should take over.

2492 """

2493

2494 patterns = Body.patterns.copy() # can't modify the original

2495 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'

2496 initial_transitions = [(name, 'Body')

2497 for name in Body.initial_transitions]

2498 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'

2499

2500 def rfc2822(self, match, context, next_state):

2501 """RFC2822-style field list item."""

2502 fieldlist = nodes.field_list(classes=['rfc2822'])

2503 self.parent += fieldlist

2504 field, blank_finish = self.rfc2822_field(match)

2505 fieldlist += field

2506 offset = self.state_machine.line_offset + 1 # next line

2507 newline_offset, blank_finish = self.nested_list_parse(

2508 self.state_machine.input_lines[offset:],

2509 input_offset=self.state_machine.abs_line_offset() + 1,

2510 node=fieldlist, initial_state='RFC2822List',

2511 blank_finish=blank_finish)

2512 self.goto_line(newline_offset)

2513 if not blank_finish:

2514 self.parent += self.unindent_warning(

2515 'RFC2822-style field list')

2516 return [], next_state, []

2517

2518 def rfc2822_field(self, match):

2519 name = match.string[:match.string.find(':')]

2520 (indented, indent, line_offset, blank_finish

2521 ) = self.state_machine.get_first_known_indented(match.end(),

2522 until_blank=True)

2523 fieldnode = nodes.field()

2524 fieldnode += nodes.field_name(name, name)

2525 fieldbody = nodes.field_body('\n'.join(indented))

2526 fieldnode += fieldbody

2527 if indented:

2528 self.nested_parse(indented, input_offset=line_offset,

2529 node=fieldbody)

2530 return fieldnode, blank_finish

2531

2532

2533class SpecializedBody(Body):

2534

2535 """

2536 Superclass for second and subsequent compound element members. Compound

2537 elements are lists and list-like constructs.

2538

2539 All transition methods are disabled (redefined as `invalid_input`).

2540 Override individual methods in subclasses to re-enable.

2541

2542 For example, once an initial bullet list item, say, is recognized, the

2543 `BulletList` subclass takes over, with a "bullet_list" node as its

2544 container. Upon encountering the initial bullet list item, `Body.bullet`

2545 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which

2546 starts up a nested parsing session with `BulletList` as the initial state.

2547 Only the ``bullet`` transition method is enabled in `BulletList`; as long

2548 as only bullet list items are encountered, they are parsed and inserted

2549 into the container. The first construct which is *not* a bullet list item

2550 triggers the `invalid_input` method, which ends the nested parse and

2551 closes the container. `BulletList` needs to recognize input that is

2552 invalid in the context of a bullet list, which means everything *other

2553 than* bullet list items, so it inherits the transition list created in

2554 `Body`.

2555 """

2556

2557 def invalid_input(self, match=None, context=None, next_state=None):

2558 """Not a compound element member. Abort this state machine."""

2559 self.state_machine.previous_line() # back up so parent SM can reassess

2560 raise EOFError

2561

2562 indent = invalid_input

2563 bullet = invalid_input

2564 enumerator = invalid_input

2565 field_marker = invalid_input

2566 option_marker = invalid_input

2567 doctest = invalid_input

2568 line_block = invalid_input

2569 grid_table_top = invalid_input

2570 simple_table_top = invalid_input

2571 explicit_markup = invalid_input

2572 anonymous = invalid_input

2573 line = invalid_input

2574 text = invalid_input

2575

2576

2577class BulletList(SpecializedBody):

2578

2579 """Second and subsequent bullet_list list_items."""

2580

2581 def bullet(self, match, context, next_state):

2582 """Bullet list item."""

2583 if match.string[0] != self.parent['bullet']:

2584 # different bullet: new list

2585 self.invalid_input()

2586 listitem, blank_finish = self.list_item(match.end())

2587 self.parent += listitem

2588 self.blank_finish = blank_finish

2589 return [], next_state, []

2590

2591

2592class DefinitionList(SpecializedBody):

2593

2594 """Second and subsequent definition_list_items."""

2595

2596 def text(self, match, context, next_state):

2597 """Definition lists."""

2598 return [match.string], 'Definition', []

2599

2600

2601class EnumeratedList(SpecializedBody):

2602

2603 """Second and subsequent enumerated_list list_items."""

2604

2605 def enumerator(self, match, context, next_state):

2606 """Enumerated list item."""

2607 format, sequence, text, ordinal = self.parse_enumerator(

2608 match, self.parent['enumtype'])

2609 if (format != self.format

2610 or (sequence != '#' and (sequence != self.parent['enumtype']

2611 or self.auto

2612 or ordinal != (self.lastordinal + 1)))

2613 or not self.is_enumerated_list_item(ordinal, sequence, format)):

2614 # different enumeration: new list

2615 self.invalid_input()

2616 if sequence == '#':

2617 self.auto = 1

2618 listitem, blank_finish = self.list_item(match.end())

2619 self.parent += listitem

2620 self.blank_finish = blank_finish

2621 self.lastordinal = ordinal

2622 return [], next_state, []

2623

2624

2625class FieldList(SpecializedBody):

2626

2627 """Second and subsequent field_list fields."""

2628

2629 def field_marker(self, match, context, next_state):

2630 """Field list field."""

2631 field, blank_finish = self.field(match)

2632 self.parent += field

2633 self.blank_finish = blank_finish

2634 return [], next_state, []

2635

2636

2637class OptionList(SpecializedBody):

2638

2639 """Second and subsequent option_list option_list_items."""

2640

2641 def option_marker(self, match, context, next_state):

2642 """Option list item."""

2643 try:

2644 option_list_item, blank_finish = self.option_list_item(match)

2645 except MarkupError:

2646 self.invalid_input()

2647 self.parent += option_list_item

2648 self.blank_finish = blank_finish

2649 return [], next_state, []

2650

2651

2652class RFC2822List(SpecializedBody, RFC2822Body):

2653

2654 """Second and subsequent RFC2822-style field_list fields."""

2655

2656 patterns = RFC2822Body.patterns

2657 initial_transitions = RFC2822Body.initial_transitions

2658

2659 def rfc2822(self, match, context, next_state):

2660 """RFC2822-style field list item."""

2661 field, blank_finish = self.rfc2822_field(match)

2662 self.parent += field

2663 self.blank_finish = blank_finish

2664 return [], 'RFC2822List', []

2665

2666 blank = SpecializedBody.invalid_input

2667

2668

2669class ExtensionOptions(FieldList):

2670

2671 """

2672 Parse field_list fields for extension options.

2673

2674 No nested parsing is done (including inline markup parsing).

2675 """

2676

2677 def parse_field_body(self, indented, offset, node) -> None:

2678 """Override `Body.parse_field_body` for simpler parsing."""

2679 lines = []

2680 for line in list(indented) + ['']:

2681 if line.strip():

2682 lines.append(line)

2683 elif lines:

2684 text = '\n'.join(lines)

2685 node += nodes.paragraph(text, text)

2686 lines = []

2687

2688

2689class LineBlock(SpecializedBody):

2690

2691 """Second and subsequent lines of a line_block."""

2692

2693 blank = SpecializedBody.invalid_input

2694

2695 def line_block(self, match, context, next_state):

2696 """New line of line block."""

2697 lineno = self.state_machine.abs_line_number()

2698 line, messages, blank_finish = self.line_block_line(match, lineno)

2699 self.parent += line

2700 self.parent.parent += messages

2701 self.blank_finish = blank_finish

2702 return [], next_state, []

2703

2704

2705class Explicit(SpecializedBody):

2706

2707 """Second and subsequent explicit markup construct."""

2708

2709 def explicit_markup(self, match, context, next_state):

2710 """Footnotes, hyperlink targets, directives, comments."""

2711 nodelist, blank_finish = self.explicit_construct(match)

2712 self.parent += nodelist

2713 self.blank_finish = blank_finish

2714 return [], next_state, []

2715

2716 def anonymous(self, match, context, next_state):

2717 """Anonymous hyperlink targets."""

2718 nodelist, blank_finish = self.anonymous_target(match)

2719 self.parent += nodelist

2720 self.blank_finish = blank_finish

2721 return [], next_state, []

2722

2723 blank = SpecializedBody.invalid_input

2724

2725

2726class SubstitutionDef(Body):

2727

2728 """

2729 Parser for the contents of a substitution_definition element.

2730 """

2731

2732 patterns = {

2733 'embedded_directive': re.compile(r'(%s)::( +|$)'

2734 % Inliner.simplename),

2735 'text': r''}

2736 initial_transitions = ['embedded_directive', 'text']

2737

2738 def embedded_directive(self, match, context, next_state):

2739 nodelist, blank_finish = self.directive(match,

2740 alt=self.parent['names'][0])

2741 self.parent += nodelist

2742 if not self.state_machine.at_eof():

2743 self.blank_finish = blank_finish

2744 raise EOFError

2745

2746 def text(self, match, context, next_state):

2747 if not self.state_machine.at_eof():

2748 self.blank_finish = self.state_machine.is_next_line_blank()

2749 raise EOFError

2750

2751

2752class Text(RSTState):

2753

2754 """

2755 Classifier of second line of a text block.

2756

2757 Could be a paragraph, a definition list item, or a title.

2758 """

2759

2760 patterns = {'underline': Body.patterns['line'],

2761 'text': r''}

2762 initial_transitions = [('underline', 'Body'), ('text', 'Body')]

2763

2764 def blank(self, match, context, next_state):

2765 """End of paragraph."""

2766 # NOTE: self.paragraph returns [node, system_message(s)], literalnext

2767 paragraph, literalnext = self.paragraph(

2768 context, self.state_machine.abs_line_number() - 1)

2769 self.parent += paragraph

2770 if literalnext:

2771 self.parent += self.literal_block()

2772 return [], 'Body', []

2773

2774 def eof(self, context):

2775 if context:

2776 self.blank(None, context, None)

2777 return []

2778

2779 def indent(self, match, context, next_state):

2780 """Definition list item."""

2781 dl = nodes.definition_list()

2782 # the definition list starts on the line before the indent:

2783 lineno = self.state_machine.abs_line_number() - 1

2784 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)

2785 dl_item, blank_finish = self.definition_list_item(context)

2786 dl += dl_item

2787 self.parent += dl

2788 offset = self.state_machine.line_offset + 1 # next line

2789 newline_offset, blank_finish = self.nested_list_parse(

2790 self.state_machine.input_lines[offset:],

2791 input_offset=self.state_machine.abs_line_offset() + 1,

2792 node=dl, initial_state='DefinitionList',

2793 blank_finish=blank_finish, blank_finish_state='Definition')

2794 self.goto_line(newline_offset)

2795 if not blank_finish:

2796 self.parent += self.unindent_warning('Definition list')

2797 return [], 'Body', []

2798

2799 def underline(self, match, context, next_state):

2800 """Section title."""

2801 lineno = self.state_machine.abs_line_number()

2802 title = context[0].rstrip()

2803 underline = match.string.rstrip()

2804 source = title + '\n' + underline

2805 messages = []

2806 if column_width(title) > len(underline):

2807 if len(underline) < 4:

2808 if self.state_machine.match_titles:

2809 msg = self.reporter.info(

2810 'Possible title underline, too short for the title.\n'

2811 "Treating it as ordinary text because it's so short.",

2812 line=lineno)

2813 self.parent += msg

2814 raise statemachine.TransitionCorrection('text')

2815 else:

2816 blocktext = context[0] + '\n' + self.state_machine.line

2817 msg = self.reporter.warning(

2818 'Title underline too short.',

2819 nodes.literal_block(blocktext, blocktext),

2820 line=lineno)

2821 messages.append(msg)

2822 if not self.state_machine.match_titles:

2823 blocktext = context[0] + '\n' + self.state_machine.line

2824 # We need get_source_and_line() here to report correctly

2825 src, srcline = self.state_machine.get_source_and_line()

2826 # TODO: why is abs_line_number() == srcline+1

2827 # if the error is in a table (try with test_tables.py)?

2828 # print("get_source_and_line", srcline)

2829 # print("abs_line_number", self.state_machine.abs_line_number())

2830 msg = self.reporter.error(

2831 'Unexpected section title.',

2832 nodes.literal_block(blocktext, blocktext),

2833 source=src, line=srcline)

2834 self.parent += messages

2835 self.parent += msg

2836 return [], next_state, []

2837 style = underline[0]

2838 context[:] = []

2839 self.section(title, source, style, lineno - 1, messages)

2840 return [], next_state, []

2841

2842 def text(self, match, context, next_state):

2843 """Paragraph."""

2844 startline = self.state_machine.abs_line_number() - 1

2845 msg = None

2846 try:

2847 block = self.state_machine.get_text_block(flush_left=True)

2848 except statemachine.UnexpectedIndentationError as err:

2849 block, src, srcline = err.args

2850 msg = self.reporter.error('Unexpected indentation.',

2851 source=src, line=srcline)

2852 lines = context + list(block)

2853 paragraph, literalnext = self.paragraph(lines, startline)

2854 self.parent += paragraph

2855 self.parent += msg

2856 if literalnext:

2857 try:

2858 self.state_machine.next_line()

2859 except EOFError:

2860 pass

2861 self.parent += self.literal_block()

2862 return [], next_state, []

2863

2864 def literal_block(self):

2865 """Return a list of nodes."""

2866 (indented, indent, offset, blank_finish

2867 ) = self.state_machine.get_indented()

2868 while indented and not indented[-1].strip():

2869 indented.trim_end()

2870 if not indented:

2871 return self.quoted_literal_block()

2872 data = '\n'.join(indented)

2873 literal_block = nodes.literal_block(data, data)

2874 (literal_block.source,

2875 literal_block.line) = self.state_machine.get_source_and_line(offset+1)

2876 nodelist = [literal_block]

2877 if not blank_finish:

2878 nodelist.append(self.unindent_warning('Literal block'))

2879 return nodelist

2880

2881 def quoted_literal_block(self):

2882 abs_line_offset = self.state_machine.abs_line_offset()

2883 offset = self.state_machine.line_offset

2884 parent_node = nodes.Element()

2885 new_abs_offset = self.nested_parse(

2886 self.state_machine.input_lines[offset:],

2887 input_offset=abs_line_offset, node=parent_node, match_titles=False,

2888 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),

2889 'initial_state': 'QuotedLiteralBlock'})

2890 self.goto_line(new_abs_offset)

2891 return parent_node.children

2892

2893 def definition_list_item(self, termline):

2894 # the parser is already on the second (indented) line:

2895 dd_lineno = self.state_machine.abs_line_number()

2896 dt_lineno = dd_lineno - 1

2897 (indented, indent, line_offset, blank_finish

2898 ) = self.state_machine.get_indented()

2899 dl_item = nodes.definition_list_item(

2900 '\n'.join(termline + list(indented)))

2901 (dl_item.source,

2902 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)

2903 dt_nodes, messages = self.term(termline, dt_lineno)

2904 dl_item += dt_nodes

2905 dd = nodes.definition('', *messages)

2906 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)

2907 dl_item += dd

2908 if termline[0][-2:] == '::':

2909 dd += self.reporter.info(

2910 'Blank line missing before literal block (after the "::")? '

2911 'Interpreted as a definition list item.',

2912 line=dd_lineno)

2913 # TODO: drop a definition if it is an empty comment to allow

2914 # definition list items with several terms?

2915 # https://sourceforge.net/p/docutils/feature-requests/60/

2916 self.nested_parse(indented, input_offset=line_offset, node=dd)

2917 return dl_item, blank_finish

2918

2919 classifier_delimiter = re.compile(' +: +')

2920

2921 def term(self, lines, lineno):

2922 """Return a definition_list's term and optional classifiers."""

2923 assert len(lines) == 1

2924 text_nodes, messages = self.inline_text(lines[0], lineno)

2925 dt = nodes.term(lines[0])

2926 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)

2927 node_list = [dt]

2928 for i in range(len(text_nodes)):

2929 node = text_nodes[i]

2930 if isinstance(node, nodes.Text):

2931 parts = self.classifier_delimiter.split(node)

2932 if len(parts) == 1:

2933 node_list[-1] += node

2934 else:

2935 text = parts[0].rstrip()

2936 textnode = nodes.Text(text)

2937 node_list[-1] += textnode

2938 node_list += [nodes.classifier(unescape(part, True), part)

2939 for part in parts[1:]]

2940 else:

2941 node_list[-1] += node

2942 return node_list, messages

2943

2944

2945class SpecializedText(Text):

2946

2947 """

2948 Superclass for second and subsequent lines of Text-variants.

2949

2950 All transition methods are disabled. Override individual methods in

2951 subclasses to re-enable.

2952 """

2953

2954 def eof(self, context):

2955 """Incomplete construct."""

2956 return []

2957

2958 def invalid_input(self, match=None, context=None, next_state=None):

2959 """Not a compound element member. Abort this state machine."""

2960 raise EOFError

2961

2962 blank = invalid_input

2963 indent = invalid_input

2964 underline = invalid_input

2965 text = invalid_input

2966

2967

2968class Definition(SpecializedText):

2969

2970 """Second line of potential definition_list_item."""

2971

2972 def eof(self, context):

2973 """Not a definition."""

2974 self.state_machine.previous_line(2) # so parent SM can reassess

2975 return []

2976

2977 def indent(self, match, context, next_state):

2978 """Definition list item."""

2979 dl_item, blank_finish = self.definition_list_item(context)

2980 self.parent += dl_item

2981 self.blank_finish = blank_finish

2982 return [], 'DefinitionList', []

2983

2984

2985class Line(SpecializedText):

2986

2987 """

2988 Second line of over- & underlined section title or transition marker.

2989 """

2990

2991 eofcheck = 1 # ignored, will be removed in Docutils 2.0.

2992

2993 def eof(self, context):

2994 """Transition marker at end of section or document."""

2995 marker = context[0].strip()

2996 if len(marker) < 4:

2997 self.state_correction(context)

2998 src, srcline = self.state_machine.get_source_and_line()

2999 # lineno = self.state_machine.abs_line_number() - 1

3000 transition = nodes.transition(rawsource=context[0])

3001 transition.source = src

3002 transition.line = srcline - 1

3003 # transition.line = lineno

3004 self.parent += transition

3005 return []

3006

3007 def blank(self, match, context, next_state):

3008 """Transition marker."""

3009 src, srcline = self.state_machine.get_source_and_line()

3010 marker = context[0].strip()

3011 if len(marker) < 4:

3012 self.state_correction(context)

3013 transition = nodes.transition(rawsource=marker)

3014 transition.source = src

3015 transition.line = srcline - 1

3016 self.parent += transition

3017 return [], 'Body', []

3018

3019 def text(self, match, context, next_state):

3020 """Potential over- & underlined title."""

3021 lineno = self.state_machine.abs_line_number() - 1

3022 overline = context[0]

3023 title = match.string

3024 underline = ''

3025 try:

3026 underline = self.state_machine.next_line()

3027 except EOFError:

3028 blocktext = overline + '\n' + title

3029 if len(overline.rstrip()) < 4:

3030 self.short_overline(context, blocktext, lineno, 2)

3031 else:

3032 msg = self.reporter.error(

3033 'Incomplete section title.',

3034 nodes.literal_block(blocktext, blocktext),

3035 line=lineno)

3036 self.parent += msg

3037 return [], 'Body', []

3038 source = '%s\n%s\n%s' % (overline, title, underline)

3039 overline = overline.rstrip()

3040 underline = underline.rstrip()

3041 if not self.transitions['underline'][0].match(underline):

3042 blocktext = overline + '\n' + title + '\n' + underline

3043 if len(overline.rstrip()) < 4:

3044 self.short_overline(context, blocktext, lineno, 2)

3045 else:

3046 msg = self.reporter.error(

3047 'Missing matching underline for section title overline.',

3048 nodes.literal_block(source, source),

3049 line=lineno)

3050 self.parent += msg

3051 return [], 'Body', []

3052 elif overline != underline:

3053 blocktext = overline + '\n' + title + '\n' + underline

3054 if len(overline.rstrip()) < 4:

3055 self.short_overline(context, blocktext, lineno, 2)

3056 else:

3057 msg = self.reporter.error(

3058 'Title overline & underline mismatch.',

3059 nodes.literal_block(source, source),

3060 line=lineno)

3061 self.parent += msg

3062 return [], 'Body', []

3063 title = title.rstrip()

3064 messages = []

3065 if column_width(title) > len(overline):

3066 blocktext = overline + '\n' + title + '\n' + underline

3067 if len(overline.rstrip()) < 4:

3068 self.short_overline(context, blocktext, lineno, 2)

3069 else:

3070 msg = self.reporter.warning(

3071 'Title overline too short.',

3072 nodes.literal_block(source, source),

3073 line=lineno)

3074 messages.append(msg)

3075 style = (overline[0], underline[0])

3076 self.section(title.lstrip(), source, style, lineno + 1, messages)

3077 return [], 'Body', []

3078

3079 indent = text # indented title

3080

3081 def underline(self, match, context, next_state):

3082 overline = context[0]

3083 blocktext = overline + '\n' + self.state_machine.line

3084 lineno = self.state_machine.abs_line_number() - 1

3085 if len(overline.rstrip()) < 4:

3086 self.short_overline(context, blocktext, lineno, 1)

3087 msg = self.reporter.error(

3088 'Invalid section title or transition marker.',

3089 nodes.literal_block(blocktext, blocktext),

3090 line=lineno)

3091 self.parent += msg

3092 return [], 'Body', []

3093

3094 def short_overline(self, context, blocktext, lineno, lines=1) -> None:

3095 msg = self.reporter.info(

3096 'Possible incomplete section title.\nTreating the overline as '

3097 "ordinary text because it's so short.",

3098 line=lineno)

3099 self.parent += msg

3100 self.state_correction(context, lines)

3101

3102 def state_correction(self, context, lines=1):

3103 self.state_machine.previous_line(lines)

3104 context[:] = []

3105 raise statemachine.StateCorrection('Body', 'text')

3106

3107

3108class QuotedLiteralBlock(RSTState):

3109

3110 """

3111 Nested parse handler for quoted (unindented) literal blocks.

3112

3113 Special-purpose. Not for inclusion in `state_classes`.

3114 """

3115

3116 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,

3117 'text': r''}

3118 initial_transitions = ('initial_quoted', 'text')

3119

3120 def __init__(self, state_machine, debug=False) -> None:

3121 RSTState.__init__(self, state_machine, debug)

3122 self.messages = []

3123 self.initial_lineno = None

3124

3125 def blank(self, match, context, next_state):

3126 if context:

3127 raise EOFError

3128 else:

3129 return context, next_state, []

3130

3131 def eof(self, context):

3132 if context:

3133 src, srcline = self.state_machine.get_source_and_line(

3134 self.initial_lineno)

3135 text = '\n'.join(context)

3136 literal_block = nodes.literal_block(text, text)

3137 literal_block.source = src

3138 literal_block.line = srcline

3139 self.parent += literal_block

3140 else:

3141 self.parent += self.reporter.warning(

3142 'Literal block expected; none found.',

3143 line=self.state_machine.abs_line_number()

3144 ) # src not available, statemachine.input_lines is empty

3145 self.state_machine.previous_line()

3146 self.parent += self.messages

3147 return []

3148

3149 def indent(self, match, context, next_state):

3150 assert context, ('QuotedLiteralBlock.indent: context should not '

3151 'be empty!')

3152 self.messages.append(

3153 self.reporter.error('Unexpected indentation.',

3154 line=self.state_machine.abs_line_number()))

3155 self.state_machine.previous_line()

3156 raise EOFError

3157

3158 def initial_quoted(self, match, context, next_state):

3159 """Match arbitrary quote character on the first line only."""

3160 self.remove_transition('initial_quoted')

3161 quote = match.string[0]

3162 pattern = re.compile(re.escape(quote))

3163 # New transition matches consistent quotes only:

3164 self.add_transition('quoted',

3165 (pattern, self.quoted, self.__class__.__name__))

3166 self.initial_lineno = self.state_machine.abs_line_number()

3167 return [match.string], next_state, []

3168

3169 def quoted(self, match, context, next_state):

3170 """Match consistent quotes on subsequent lines."""

3171 context.append(match.string)

3172 return context, next_state, []

3173

3174 def text(self, match, context, next_state):

3175 if context:

3176 self.messages.append(

3177 self.reporter.error('Inconsistent literal block quoting.',

3178 line=self.state_machine.abs_line_number()))

3179 self.state_machine.previous_line()

3180 raise EOFError

3181

3182

3183state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,

3184 OptionList, LineBlock, ExtensionOptions, Explicit, Text,

3185 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)

3186"""Standard set of State classes used to start `RSTStateMachine`."""