Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/parsers/rst/states.py: 66%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Copyright: This module has been placed in the public domain.

5"""

6This is the ``docutils.parsers.rst.states`` module, the core of

7the reStructuredText parser. It defines the following:

9:Classes:

10 - `RSTStateMachine`: reStructuredText parser's entry point.

11 - `NestedStateMachine`: recursive StateMachine.

12 - `RSTState`: reStructuredText State superclass.

13 - `Inliner`: For parsing inline markup.

14 - `Body`: Generic classifier of the first line of a block.

15 - `SpecializedBody`: Superclass for compound element members.

16 - `BulletList`: Second and subsequent bullet_list list_items

17 - `DefinitionList`: Second+ definition_list_items.

18 - `EnumeratedList`: Second+ enumerated_list list_items.

19 - `FieldList`: Second+ fields.

20 - `OptionList`: Second+ option_list_items.

21 - `RFC2822List`: Second+ RFC2822-style fields.

22 - `ExtensionOptions`: Parses directive option fields.

23 - `Explicit`: Second+ explicit markup constructs.

24 - `SubstitutionDef`: For embedded directives in substitution definitions.

25 - `Text`: Classifier of second line of a text block.

26 - `SpecializedText`: Superclass for continuation lines of Text-variants.

27 - `Definition`: Second line of potential definition_list_item.

28 - `Line`: Second line of overlined section title or transition marker.

29 - `Struct`: obsolete, use `types.SimpleNamespace`.

31:Exception classes:

32 - `MarkupError`

33 - `ParserError`

34 - `MarkupMismatch`

36:Functions:

37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.

38 - `unescape()`: Return a string, nulls removed or restored to backslashes.

40:Attributes:

41 - `state_classes`: set of State classes used with `RSTStateMachine`.

43Parser Overview

44===============

46The reStructuredText parser is implemented as a recursive state machine,

47examining its input one line at a time. To understand how the parser works,

48please first become familiar with the `docutils.statemachine` module. In the

49description below, references are made to classes defined in this module;

50please see the individual classes for details.

52Parsing proceeds as follows:

541. The state machine examines each line of input, checking each of the

55 transition patterns of the state `Body`, in order, looking for a match.

56 The implicit transitions (blank lines and indentation) are checked before

57 any others. The 'text' transition is a catch-all (matches anything).

592. The method associated with the matched transition pattern is called.

61 A. Some transition methods are self-contained, appending elements to the

62 document tree (`Body.doctest` parses a doctest block). The parser's

63 current line index is advanced to the end of the element, and parsing

64 continues with step 1.

66 B. Other transition methods trigger the creation of a nested state machine,

67 whose job is to parse a compound construct ('indent' does a block quote,

68 'bullet' does a bullet list, 'overline' does a section [first checking

69 for a valid section header], etc.).

71 - In the case of lists and explicit markup, a one-off state machine is

72 created and run to parse contents of the first item.

74 - A new state machine is created and its initial state is set to the

75 appropriate specialized state (`BulletList` in the case of the

76 'bullet' transition; see `SpecializedBody` for more detail). This

77 state machine is run to parse the compound element (or series of

78 explicit markup elements), and returns as soon as a non-member element

79 is encountered. For example, the `BulletList` state machine ends as

80 soon as it encounters an element which is not a list item of that

81 bullet list. The optional omission of inter-element blank lines is

82 enabled by this nested state machine.

84 - The current line index is advanced to the end of the elements parsed,

85 and parsing continues with step 1.

87 C. The result of the 'text' transition depends on the next line of text.

88 The current state is changed to `Text`, under which the second line is

89 examined. If the second line is:

91 - Indented: The element is a definition list item, and parsing proceeds

92 similarly to step 2.B, using the `DefinitionList` state.

94 - A line of uniform punctuation characters: The element is a section

95 header; again, parsing proceeds as in step 2.B, and `Body` is still

96 used.

98 - Anything else: The element is a paragraph, which is examined for

99 inline markup and appended to the parent element. Processing

100 continues with step 1.

101"""

102

103from __future__ import annotations

104

105__docformat__ = 'reStructuredText'

106

107import copy

108import re

109from types import FunctionType, MethodType

110from types import SimpleNamespace as Struct

111

112from docutils import nodes, statemachine, utils

113from docutils import ApplicationError, DataError

114from docutils.statemachine import StateMachineWS, StateWS

115from docutils.nodes import fully_normalize_name as normalize_name

116from docutils.nodes import unescape, whitespace_normalize_name

117import docutils.parsers.rst

118from docutils.parsers.rst import directives, languages, tableparser, roles

119from docutils.utils import escape2null, column_width

120from docutils.utils import punctuation_chars, urischemes

121from docutils.utils import split_escaped_whitespace

122from docutils.utils._roman_numerals import (InvalidRomanNumeralError,

123 RomanNumeral)

124

125TYPE_CHECKING = False

126if TYPE_CHECKING:

127 from docutils.statemachine import StringList

128

129

130class MarkupError(DataError): pass

131class UnknownInterpretedRoleError(DataError): pass

132class InterpretedRoleNotImplementedError(DataError): pass

133class ParserError(ApplicationError): pass

134class MarkupMismatch(Exception): pass

135

136

137class RSTStateMachine(StateMachineWS):

138

139 """

140 reStructuredText's master StateMachine.

141

142 The entry point to reStructuredText parsing is the `run()` method.

143 """

144

145 def run(self, input_lines, document, input_offset=0, match_titles=True,

146 inliner=None) -> None:

147 """

148 Parse `input_lines` and modify the `document` node in place.

149

150 Extend `StateMachineWS.run()`: set up parse-global data and

151 run the StateMachine.

152 """

153 self.language = languages.get_language(

154 document.settings.language_code, document.reporter)

155 self.match_titles = match_titles

156 if inliner is None:

157 inliner = Inliner()

158 inliner.init_customizations(document.settings)

159 # A collection of objects to share with nested parsers.

160 # The attributes `reporter`, `section_level`, and

161 # `section_bubble_up_kludge` will be removed in Docutils 2.0

162 self.memo = Struct(document=document,

163 reporter=document.reporter, # ignored

164 language=self.language,

165 title_styles=[],

166 section_level=0, # ignored

167 section_bubble_up_kludge=False, # ignored

168 inliner=inliner)

169 self.document = document

170 self.attach_observer(document.note_source)

171 self.reporter = self.document.reporter

172 self.node = document

173 results = StateMachineWS.run(self, input_lines, input_offset,

174 input_source=document['source'])

175 assert results == [], 'RSTStateMachine.run() results should be empty!'

176 self.node = self.memo = None # remove unneeded references

177

178

179class NestedStateMachine(StateMachineWS):

180 """

181 StateMachine run from within other StateMachine runs, to parse nested

182 document structures.

183 """

184

185 def run(self, input_lines, input_offset, memo, node, match_titles=True):

186 """

187 Parse `input_lines` and populate `node`.

188

189 Use a separate "title style hierarchy" (changed in Docutils 0.23).

190

191 Extend `StateMachineWS.run()`: set up document-wide data.

192 """

193 self.match_titles = match_titles

194 self.memo = copy.copy(memo)

195 self.document = memo.document

196 self.attach_observer(self.document.note_source)

197 self.language = memo.language

198 self.reporter = self.document.reporter

199 self.node = node

200 if match_titles:

201 # Use a separate section title style hierarchy;

202 # ensure all sections in the `input_lines` are treated as

203 # subsections of the current section by blocking lower

204 # section levels with a style that is impossible in rST:

205 self.memo.title_styles = ['x'] * len(node.section_hierarchy())

206 results = StateMachineWS.run(self, input_lines, input_offset)

207 assert results == [], ('NestedStateMachine.run() results should be '

208 'empty!')

209 return results

210

211

212class RSTState(StateWS):

213

214 """

215 reStructuredText State superclass.

216

217 Contains methods used by all State subclasses.

218 """

219

220 nested_sm = NestedStateMachine

221 nested_sm_cache = []

222

223 def __init__(self, state_machine, debug=False) -> None:

224 self.nested_sm_kwargs = {'state_classes': state_classes,

225 'initial_state': 'Body'}

226 StateWS.__init__(self, state_machine, debug)

227

228 def runtime_init(self) -> None:

229 StateWS.runtime_init(self)

230 memo = self.state_machine.memo

231 self.memo = memo

232 self.document = memo.document

233 self.inliner = memo.inliner

234 self.reporter = self.document.reporter

235 self.parent = self.state_machine.node

236 # enable the reporter to determine source and source-line

237 if not hasattr(self.reporter, 'get_source_and_line'):

238 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501

239

240 def goto_line(self, abs_line_offset) -> None:

241 """

242 Jump to input line `abs_line_offset`, ignoring jumps past the end.

243 """

244 try:

245 self.state_machine.goto_line(abs_line_offset)

246 except EOFError:

247 pass

248

249 def no_match(self, context, transitions):

250 """

251 Override `StateWS.no_match` to generate a system message.

252

253 This code should never be run.

254 """

255 self.reporter.severe(

256 'Internal error: no transition pattern match. State: "%s"; '

257 'transitions: %s; context: %s; current line: %r.'

258 % (self.__class__.__name__, transitions, context,

259 self.state_machine.line))

260 return context, None, []

261

262 def bof(self, context):

263 """Called at beginning of file."""

264 return [], []

265

266 def nested_parse(self,

267 block: StringList,

268 input_offset: int,

269 node: nodes.Element,

270 match_titles: bool = False,

271 state_machine_class: StateMachineWS|None = None,

272 state_machine_kwargs: dict|None = None

273 ) -> int:

274 """

275 Parse the input `block` with a nested state-machine rooted at `node`.

276

277 :block:

278 reStructuredText source extract.

279 :input_offset:

280 Line number at start of the block.

281 :node:

282 Base node. All generated nodes will be appended to this node.

283 :match_titles:

284 Allow section titles?

285 A separate section title style hierarchy is used for the nested

286 parsing (all sections are subsections of the current section).

287 The calling code should check whether sections are valid

288 children of the base node and move them or warn otherwise.

289 :state_machine_class:

290 Default: `NestedStateMachine`.

291 :state_machine_kwargs:

292 Keyword arguments for the state-machine instantiation.

293 Default: `self.nested_sm_kwargs`.

294

295 Create a new state-machine instance if required.

296 Return new offset.

297 """

298 use_default = 0

299 if state_machine_class is None:

300 state_machine_class = self.nested_sm

301 use_default += 1

302 if state_machine_kwargs is None:

303 state_machine_kwargs = self.nested_sm_kwargs

304 use_default += 1

305 state_machine = None

306 if use_default == 2:

307 try:

308 state_machine = self.nested_sm_cache.pop()

309 except IndexError:

310 pass

311 if not state_machine:

312 state_machine = state_machine_class(debug=self.debug,

313 **state_machine_kwargs)

314 # run the statemachine and populate `node`:

315 block_length = len(block)

316 state_machine.run(block, input_offset, memo=self.memo,

317 node=node, match_titles=match_titles)

318 # clean up

319 if use_default == 2:

320 self.nested_sm_cache.append(state_machine)

321 else:

322 state_machine.unlink()

323 new_offset = state_machine.abs_line_offset()

324 # No `block.parent` implies disconnected -- lines aren't in sync:

325 if block.parent and (len(block) - block_length) != 0:

326 # Adjustment for block if modified in nested parse:

327 self.state_machine.next_line(len(block) - block_length)

328 return new_offset

329

330 def nested_list_parse(self, block, input_offset, node, initial_state,

331 blank_finish,

332 blank_finish_state=None,

333 extra_settings={},

334 match_titles=False,

335 state_machine_class=None,

336 state_machine_kwargs=None):

337 """

338 Parse the input `block` with a nested state-machine rooted at `node`.

339

340 Create a new StateMachine rooted at `node` and run it over the

341 input `block` (see also `nested_parse()`).

342 Also keep track of optional intermediate blank lines and the

343 required final one.

344

345 Return new offset and a boolean indicating whether there was a

346 blank final line.

347 """

348 if state_machine_class is None:

349 state_machine_class = self.nested_sm

350 if state_machine_kwargs is None:

351 state_machine_kwargs = self.nested_sm_kwargs.copy()

352 state_machine_kwargs['initial_state'] = initial_state

353 state_machine = state_machine_class(debug=self.debug,

354 **state_machine_kwargs)

355 if blank_finish_state is None:

356 blank_finish_state = initial_state

357 state_machine.states[blank_finish_state].blank_finish = blank_finish

358 for key, value in extra_settings.items():

359 setattr(state_machine.states[initial_state], key, value)

360 state_machine.run(block, input_offset, memo=self.memo,

361 node=node, match_titles=match_titles)

362 blank_finish = state_machine.states[blank_finish_state].blank_finish

363 state_machine.unlink()

364 return state_machine.abs_line_offset(), blank_finish

365

366 def section(self, title, source, style, lineno, messages) -> None:

367 """Check for a valid subsection and create one if it checks out."""

368 if self.check_subsection(source, style, lineno):

369 self.new_subsection(title, lineno, messages)

370

371 def check_subsection(self, source, style, lineno) -> bool:

372 """

373 Check for a valid subsection header. Update section data in `memo`.

374

375 When a new section is reached that isn't a subsection of the current

376 section, set `self.parent` to the new section's parent section

377 (or the root node if the new section is a top-level section).

378 """

379 title_styles = self.memo.title_styles

380 parent_sections = self.parent.section_hierarchy()

381 # current section level: (0 root, 1 section, 2 subsection, ...)

382 oldlevel = len(parent_sections)

383 # new section level:

384 try: # check for existing title style

385 newlevel = title_styles.index(style) + 1

386 except ValueError: # new title style

387 newlevel = len(title_styles) + 1

388 # The new level must not be deeper than an immediate child

389 # of the current level:

390 if newlevel > oldlevel + 1:

391 styles = ' '.join('/'.join(style) for style in title_styles)

392 self.parent += self.reporter.error(

393 'Inconsistent title style:'

394 f' skip from level {oldlevel} to {newlevel}.',

395 nodes.literal_block('', source),

396 nodes.paragraph('', f'Established title styles: {styles}'),

397 line=lineno)

398 return False

399 # Update parent state:

400 if newlevel > len(title_styles):

401 title_styles.append(style)

402 self.memo.section_level = newlevel

403 if newlevel <= oldlevel:

404 # new section is sibling or higher up in the section hierarchy

405 self.parent = parent_sections[newlevel-1].parent

406 return True

407

408 def title_inconsistent(self, sourcetext, lineno):

409 # Ignored. Will be removed in Docutils 2.0.

410 error = self.reporter.error(

411 'Title level inconsistent:', nodes.literal_block('', sourcetext),

412 line=lineno)

413 return error

414

415 def new_subsection(self, title, lineno, messages):

416 """Append new subsection to document tree."""

417 section_node = nodes.section()

418 self.parent += section_node

419 textnodes, title_messages = self.inline_text(title, lineno)

420 titlenode = nodes.title(title, '', *textnodes)

421 name = normalize_name(titlenode.astext())

422 section_node['names'].append(name)

423 section_node += titlenode

424 section_node += messages

425 section_node += title_messages

426 self.document.note_implicit_target(section_node, section_node)

427 # Update state:

428 self.state_machine.node = section_node

429 # Also update the ".parent" attribute in all states.

430 # This is a bit violent, but the state classes copy their .parent from

431 # state_machine.node on creation, so we need to update them. We could

432 # also remove RSTState.parent entirely and replace references to it

433 # with statemachine.node, but that might break code downstream of

434 # docutils.

435 for s in self.state_machine.states.values():

436 s.parent = section_node

437

438 def paragraph(self, lines, lineno):

439 """

440 Return a list (paragraph & messages) & a boolean: literal_block next?

441 """

442 data = '\n'.join(lines).rstrip()

443 if re.search(r'(?<!\\)(\\\\)*::$', data):

444 if len(data) == 2:

445 return [], 1

446 elif data[-3] in ' \n':

447 text = data[:-3].rstrip()

448 else:

449 text = data[:-1]

450 literalnext = 1

451 else:

452 text = data

453 literalnext = 0

454 textnodes, messages = self.inline_text(text, lineno)

455 p = nodes.paragraph(data, '', *textnodes)

456 p.source, p.line = self.state_machine.get_source_and_line(lineno)

457 return [p] + messages, literalnext

458

459 def inline_text(self, text, lineno):

460 """

461 Return 2 lists: nodes (text and inline elements), and system_messages.

462 """

463 nodes, messages = self.inliner.parse(text, lineno,

464 self.memo, self.parent)

465 return nodes, messages

466

467 def unindent_warning(self, node_name):

468 # the actual problem is one line below the current line

469 lineno = self.state_machine.abs_line_number() + 1

470 return self.reporter.warning('%s ends without a blank line; '

471 'unexpected unindent.' % node_name,

472 line=lineno)

473

474

475def build_regexp(definition, compile_patterns=True):

476 """

477 Build, compile and return a regular expression based on `definition`.

478

479 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),

480 where "parts" is a list of regular expressions and/or regular

481 expression definitions to be joined into an or-group.

482 """

483 name, prefix, suffix, parts = definition

484 part_strings = []

485 for part in parts:

486 if isinstance(part, tuple):

487 part_strings.append(build_regexp(part, None))

488 else:

489 part_strings.append(part)

490 or_group = '|'.join(part_strings)

491 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()

492 if compile_patterns:

493 return re.compile(regexp)

494 else:

495 return regexp

496

497

498class Inliner:

499

500 """

501 Parse inline markup; call the `parse()` method.

502 """

503

504 def __init__(self) -> None:

505 self.implicit_dispatch = []

506 """List of (pattern, bound method) tuples, used by

507 `self.implicit_inline`."""

508

509 def init_customizations(self, settings) -> None:

510 # lookahead and look-behind expressions for inline markup rules

511 if getattr(settings, 'character_level_inline_markup', False):

512 start_string_prefix = '(^|(?<!\x00))'

513 end_string_suffix = ''

514 else:

515 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %

516 (punctuation_chars.openers,

517 punctuation_chars.delimiters))

518 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %

519 (punctuation_chars.closing_delimiters,

520 punctuation_chars.delimiters,

521 punctuation_chars.closers))

522 args = locals().copy()

523 args.update(vars(self.__class__))

524

525 parts = ('initial_inline', start_string_prefix, '',

526 [

527 ('start', '', self.non_whitespace_after, # simple start-strings

528 [r'\*\*', # strong

529 r'\*(?!\*)', # emphasis but not strong

530 r'``', # literal

531 r'_`', # inline internal target

532 r'\|(?!\|)'] # substitution reference

533 ),

534 ('whole', '', end_string_suffix, # whole constructs

535 [ # reference name & end-string

536 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,

537 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',

538 [r'[0-9]+', # manually numbered

539 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)

540 r'\*', # auto-symbol

541 r'(?P<citationlabel>%s)' % self.simplename, # citation ref

542 ]

543 )

544 ]

545 ),

546 ('backquote', # interpreted text or phrase reference

547 '(?P<role>(:%s:)?)' % self.simplename, # optional role

548 self.non_whitespace_after,

549 ['`(?!`)'] # but not literal

550 )

551 ]

552 )

553 self.start_string_prefix = start_string_prefix

554 self.end_string_suffix = end_string_suffix

555 self.parts = parts

556

557 self.patterns = Struct(

558 initial=build_regexp(parts),

559 emphasis=re.compile(self.non_whitespace_escape_before

560 + r'(\*)' + end_string_suffix),

561 strong=re.compile(self.non_whitespace_escape_before

562 + r'(\*\*)' + end_string_suffix),

563 interpreted_or_phrase_ref=re.compile(

564 r"""

565 %(non_unescaped_whitespace_escape_before)s

566 (

567 `

568 (?P<suffix>

569 (?P<role>:%(simplename)s:)?

570 (?P<refend>__?)?

571 )

572 )

573 %(end_string_suffix)s

574 """ % args, re.VERBOSE),

575 embedded_link=re.compile(

576 r"""

577 (

578 (?:[ \n]+|^) # spaces or beginning of line/string

579 < # open bracket

580 %(non_whitespace_after)s

581 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets

582 %(non_whitespace_escape_before)s

583 > # close bracket

584 )

585 $ # end of string

586 """ % args, re.VERBOSE),

587 literal=re.compile(self.non_whitespace_before + '(``)'

588 + end_string_suffix),

589 target=re.compile(self.non_whitespace_escape_before

590 + r'(`)' + end_string_suffix),

591 substitution_ref=re.compile(self.non_whitespace_escape_before

592 + r'(\|_{0,2})'

593 + end_string_suffix),

594 email=re.compile(self.email_pattern % args + '$',

595 re.VERBOSE),

596 uri=re.compile(

597 (r"""

598 %(start_string_prefix)s

599 (?P<whole>

600 (?P<absolute> # absolute URI

601 (?P<scheme> # scheme (http, ftp, mailto)

602 [a-zA-Z][a-zA-Z0-9.+-]*

603 )

604 :

605 (

606 ( # either:

607 (//?)? # hierarchical URI

608 %(uric)s* # URI characters

609 %(uri_end)s # final URI char

610 )

611 ( # optional query

612 \?%(uric)s*

613 %(uri_end)s

614 )?

615 ( # optional fragment

616 \#%(uric)s*

617 %(uri_end)s

618 )?

619 )

620 )

621 | # *OR*

622 (?P<email> # email address

623 """ + self.email_pattern + r"""

624 )

625 )

626 %(end_string_suffix)s

627 """) % args, re.VERBOSE),

628 pep=re.compile(

629 r"""

630 %(start_string_prefix)s

631 (

632 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file

633 |

634 (PEP\s+(?P<pepnum2>\d+)) # reference by name

635 )

636 %(end_string_suffix)s""" % args, re.VERBOSE),

637 rfc=re.compile(

638 r"""

639 %(start_string_prefix)s

640 (RFC(-|\s+)?(?P<rfcnum>\d+))

641 %(end_string_suffix)s""" % args, re.VERBOSE))

642

643 self.implicit_dispatch.append((self.patterns.uri,

644 self.standalone_uri))

645 if settings.pep_references:

646 self.implicit_dispatch.append((self.patterns.pep,

647 self.pep_reference))

648 if settings.rfc_references:

649 self.implicit_dispatch.append((self.patterns.rfc,

650 self.rfc_reference))

651

652 def parse(self, text, lineno, memo, parent):

653 # Needs to be refactored for nested inline markup.

654 # Add nested_parse() method?

655 """

656 Return 2 lists: nodes (text and inline elements), and system_messages.

657

658 Using `self.patterns.initial`, a pattern which matches start-strings

659 (emphasis, strong, interpreted, phrase reference, literal,

660 substitution reference, and inline target) and complete constructs

661 (simple reference, footnote reference), search for a candidate. When

662 one is found, check for validity (e.g., not a quoted '*' character).

663 If valid, search for the corresponding end string if applicable, and

664 check it for validity. If not found or invalid, generate a warning

665 and ignore the start-string. Implicit inline markup (e.g. standalone

666 URIs) is found last.

667

668 :text: source string

669 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`

670 """

671 self.document = memo.document

672 self.language = memo.language

673 self.reporter = self.document.reporter

674 self.parent = parent

675 pattern_search = self.patterns.initial.search

676 dispatch = self.dispatch

677 remaining = escape2null(text)

678 processed = []

679 unprocessed = []

680 messages = []

681 while remaining:

682 match = pattern_search(remaining)

683 if match:

684 groups = match.groupdict()

685 method = dispatch[groups['start'] or groups['backquote']

686 or groups['refend'] or groups['fnend']]

687 before, inlines, remaining, sysmessages = method(self, match,

688 lineno)

689 unprocessed.append(before)

690 messages += sysmessages

691 if inlines:

692 processed += self.implicit_inline(''.join(unprocessed),

693 lineno)

694 processed += inlines

695 unprocessed = []

696 else:

697 break

698 remaining = ''.join(unprocessed) + remaining

699 if remaining:

700 processed += self.implicit_inline(remaining, lineno)

701 return processed, messages

702

703 # Inline object recognition

704 # -------------------------

705 # See also init_customizations().

706 non_whitespace_before = r'(?<!\s)'

707 non_whitespace_escape_before = r'(?<![\s\x00])'

708 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'

709 non_whitespace_after = r'(?!\s)'

710 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):

711 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'

712 # Valid URI characters (see RFC 2396 & RFC 2732);

713 # final \x00 allows backslash escapes in URIs:

714 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""

715 # Delimiter indicating the end of a URI (not part of the URI):

716 uri_end_delim = r"""[>]"""

717 # Last URI character; same as uric but no punctuation:

718 urilast = r"""[_~*/=+a-zA-Z0-9]"""

719 # End of a URI (either 'urilast' or 'uric followed by a

720 # uri_end_delim'):

721 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()

722 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""

723 email_pattern = r"""

724 %(emailc)s+(?:\.%(emailc)s+)* # name

725 (?<!\x00)@ # at

726 %(emailc)s+(?:\.%(emailc)s*)* # host

727 %(uri_end)s # final URI char

728 """

729

730 def quoted_start(self, match):

731 """Test if inline markup start-string is 'quoted'.

732

733 'Quoted' in this context means the start-string is enclosed in a pair

734 of matching opening/closing delimiters (not necessarily quotes)

735 or at the end of the match.

736 """

737 string = match.string

738 start = match.start()

739 if start == 0: # start-string at beginning of text

740 return False

741 prestart = string[start - 1]

742 try:

743 poststart = string[match.end()]

744 except IndexError: # start-string at end of text

745 return True # not "quoted" but no markup start-string either

746 return punctuation_chars.match_chars(prestart, poststart)

747

748 def inline_obj(self, match, lineno, end_pattern, nodeclass,

749 restore_backslashes=False):

750 string = match.string

751 matchstart = match.start('start')

752 matchend = match.end('start')

753 if self.quoted_start(match):

754 return string[:matchend], [], string[matchend:], [], ''

755 endmatch = end_pattern.search(string[matchend:])

756 if endmatch and endmatch.start(1): # 1 or more chars

757 text = endmatch.string[:endmatch.start(1)]

758 if restore_backslashes:

759 text = unescape(text, True)

760 textend = matchend + endmatch.end(1)

761 rawsource = unescape(string[matchstart:textend], True)

762 node = nodeclass(rawsource, text)

763 return (string[:matchstart], [node],

764 string[textend:], [], endmatch.group(1))

765 msg = self.reporter.warning(

766 'Inline %s start-string without end-string.'

767 % nodeclass.__name__, line=lineno)

768 text = unescape(string[matchstart:matchend], True)

769 prb = self.problematic(text, text, msg)

770 return string[:matchstart], [prb], string[matchend:], [msg], ''

771

772 def problematic(self, text, rawsource, message):

773 msgid = self.document.set_id(message, self.parent)

774 problematic = nodes.problematic(rawsource, text, refid=msgid)

775 prbid = self.document.set_id(problematic)

776 message.add_backref(prbid)

777 return problematic

778

779 def emphasis(self, match, lineno):

780 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

781 match, lineno, self.patterns.emphasis, nodes.emphasis)

782 return before, inlines, remaining, sysmessages

783

784 def strong(self, match, lineno):

785 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

786 match, lineno, self.patterns.strong, nodes.strong)

787 return before, inlines, remaining, sysmessages

788

789 def interpreted_or_phrase_ref(self, match, lineno):

790 end_pattern = self.patterns.interpreted_or_phrase_ref

791 string = match.string

792 matchstart = match.start('backquote')

793 matchend = match.end('backquote')

794 rolestart = match.start('role')

795 role = match.group('role')

796 position = ''

797 if role:

798 role = role[1:-1]

799 position = 'prefix'

800 elif self.quoted_start(match):

801 return string[:matchend], [], string[matchend:], []

802 endmatch = end_pattern.search(string[matchend:])

803 if endmatch and endmatch.start(1): # 1 or more chars

804 textend = matchend + endmatch.end()

805 if endmatch.group('role'):

806 if role:

807 msg = self.reporter.warning(

808 'Multiple roles in interpreted text (both '

809 'prefix and suffix present; only one allowed).',

810 line=lineno)

811 text = unescape(string[rolestart:textend], True)

812 prb = self.problematic(text, text, msg)

813 return string[:rolestart], [prb], string[textend:], [msg]

814 role = endmatch.group('suffix')[1:-1]

815 position = 'suffix'

816 escaped = endmatch.string[:endmatch.start(1)]

817 rawsource = unescape(string[matchstart:textend], True)

818 if rawsource[-1:] == '_':

819 if role:

820 msg = self.reporter.warning(

821 'Mismatch: both interpreted text role %s and '

822 'reference suffix.' % position, line=lineno)

823 text = unescape(string[rolestart:textend], True)

824 prb = self.problematic(text, text, msg)

825 return string[:rolestart], [prb], string[textend:], [msg]

826 return self.phrase_ref(string[:matchstart], string[textend:],

827 rawsource, escaped)

828 else:

829 rawsource = unescape(string[rolestart:textend], True)

830 nodelist, messages = self.interpreted(rawsource, escaped, role,

831 lineno)

832 return (string[:rolestart], nodelist,

833 string[textend:], messages)

834 msg = self.reporter.warning(

835 'Inline interpreted text or phrase reference start-string '

836 'without end-string.', line=lineno)

837 text = unescape(string[matchstart:matchend], True)

838 prb = self.problematic(text, text, msg)

839 return string[:matchstart], [prb], string[matchend:], [msg]

840

841 def phrase_ref(self, before, after, rawsource, escaped, text=None):

842 # `text` is ignored (since 0.16)

843 match = self.patterns.embedded_link.search(escaped)

844 if match: # embedded <URI> or <alias_>

845 text = escaped[:match.start(0)]

846 unescaped = unescape(text)

847 rawtext = unescape(text, True)

848 aliastext = match.group(2)

849 rawaliastext = unescape(aliastext, True)

850 underscore_escaped = rawaliastext.endswith(r'\_')

851 if (aliastext.endswith('_')

852 and not (underscore_escaped

853 or self.patterns.uri.match(aliastext))):

854 aliastype = 'name'

855 alias = normalize_name(unescape(aliastext[:-1]))

856 target = nodes.target(match.group(1), refname=alias)

857 target.indirect_reference_name = whitespace_normalize_name(

858 unescape(aliastext[:-1]))

859 else:

860 aliastype = 'uri'

861 # remove unescaped whitespace

862 alias_parts = split_escaped_whitespace(match.group(2))

863 alias = ' '.join(''.join(part.split())

864 for part in alias_parts)

865 alias = self.adjust_uri(unescape(alias))

866 if alias.endswith(r'\_'):

867 alias = alias[:-2] + '_'

868 target = nodes.target(match.group(1), refuri=alias)

869 target.referenced = 1

870 if not aliastext:

871 raise ApplicationError('problem with embedded link: %r'

872 % aliastext)

873 if not text:

874 text = alias

875 unescaped = unescape(text)

876 rawtext = rawaliastext

877 else:

878 text = escaped

879 unescaped = unescape(text)

880 target = None

881 rawtext = unescape(escaped, True)

882

883 refname = normalize_name(unescaped)

884 reference = nodes.reference(rawsource, text,

885 name=whitespace_normalize_name(unescaped))

886 reference[0].rawsource = rawtext

887

888 node_list = [reference]

889

890 if rawsource[-2:] == '__':

891 if target and (aliastype == 'name'):

892 reference['refname'] = alias

893 self.document.note_refname(reference)

894 # self.document.note_indirect_target(target) # required?

895 elif target and (aliastype == 'uri'):

896 reference['refuri'] = alias

897 else:

898 reference['anonymous'] = True

899 else:

900 if target:

901 target['names'].append(refname)

902 if aliastype == 'name':

903 reference['refname'] = alias

904 self.document.note_indirect_target(target)

905 self.document.note_refname(reference)

906 else:

907 reference['refuri'] = alias

908 # target.note_referenced_by(name=refname)

909 self.document.note_implicit_target(target, self.parent)

910 node_list.append(target)

911 else:

912 reference['refname'] = refname

913 self.document.note_refname(reference)

914 return before, node_list, after, []

915

916 def adjust_uri(self, uri):

917 match = self.patterns.email.match(uri)

918 if match:

919 return 'mailto:' + uri

920 else:

921 return uri

922

923 def interpreted(self, rawsource, text, role, lineno):

924 role_fn, messages = roles.role(role, self.language, lineno,

925 self.reporter)

926 if role_fn:

927 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)

928 return nodes, messages + messages2

929 else:

930 msg = self.reporter.error(

931 'Unknown interpreted text role "%s".' % role,

932 line=lineno)

933 return ([self.problematic(rawsource, rawsource, msg)],

934 messages + [msg])

935

936 def literal(self, match, lineno):

937 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

938 match, lineno, self.patterns.literal, nodes.literal,

939 restore_backslashes=True)

940 return before, inlines, remaining, sysmessages

941

942 def inline_internal_target(self, match, lineno):

943 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

944 match, lineno, self.patterns.target, nodes.target)

945 if inlines and isinstance(inlines[0], nodes.target):

946 assert len(inlines) == 1

947 target = inlines[0]

948 name = normalize_name(target.astext())

949 target['names'].append(name)

950 self.document.note_explicit_target(target, self.parent)

951 return before, inlines, remaining, sysmessages

952

953 def substitution_reference(self, match, lineno):

954 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

955 match, lineno, self.patterns.substitution_ref,

956 nodes.substitution_reference)

957 if len(inlines) == 1:

958 subref_node = inlines[0]

959 if isinstance(subref_node, nodes.substitution_reference):

960 subref_text = subref_node.astext()

961 self.document.note_substitution_ref(subref_node, subref_text)

962 if endstring[-1:] == '_':

963 reference_node = nodes.reference(

964 '|%s%s' % (subref_text, endstring), '')

965 if endstring[-2:] == '__':

966 reference_node['anonymous'] = True

967 else:

968 reference_node['refname'] = normalize_name(subref_text)

969 self.document.note_refname(reference_node)

970 reference_node += subref_node

971 inlines = [reference_node]

972 return before, inlines, remaining, sysmessages

973

974 def footnote_reference(self, match, lineno):

975 """

976 Handles `nodes.footnote_reference` and `nodes.citation_reference`

977 elements.

978 """

979 label = match.group('footnotelabel')

980 refname = normalize_name(label)

981 string = match.string

982 before = string[:match.start('whole')]

983 remaining = string[match.end('whole'):]

984 if match.group('citationlabel'):

985 refnode = nodes.citation_reference('[%s]_' % label,

986 refname=refname)

987 refnode += nodes.Text(label)

988 self.document.note_citation_ref(refnode)

989 else:

990 refnode = nodes.footnote_reference('[%s]_' % label)

991 if refname[0] == '#':

992 refname = refname[1:]

993 refnode['auto'] = 1

994 self.document.note_autofootnote_ref(refnode)

995 elif refname == '*':

996 refname = ''

997 refnode['auto'] = '*'

998 self.document.note_symbol_footnote_ref(

999 refnode)

1000 else:

1001 refnode += nodes.Text(label)

1002 if refname:

1003 refnode['refname'] = refname

1004 self.document.note_footnote_ref(refnode)

1005 if utils.get_trim_footnote_ref_space(self.document.settings):

1006 before = before.rstrip()

1007 return before, [refnode], remaining, []

1008

1009 def reference(self, match, lineno, anonymous=False):

1010 referencename = match.group('refname')

1011 refname = normalize_name(referencename)

1012 referencenode = nodes.reference(

1013 referencename + match.group('refend'), referencename,

1014 name=whitespace_normalize_name(referencename))

1015 referencenode[0].rawsource = referencename

1016 if anonymous:

1017 referencenode['anonymous'] = True

1018 else:

1019 referencenode['refname'] = refname

1020 self.document.note_refname(referencenode)

1021 string = match.string

1022 matchstart = match.start('whole')

1023 matchend = match.end('whole')

1024 return string[:matchstart], [referencenode], string[matchend:], []

1025

1026 def anonymous_reference(self, match, lineno):

1027 return self.reference(match, lineno, anonymous=True)

1028

1029 def standalone_uri(self, match, lineno):

1030 if (not match.group('scheme')

1031 or match.group('scheme').lower() in urischemes.schemes):

1032 if match.group('email'):

1033 addscheme = 'mailto:'

1034 else:

1035 addscheme = ''

1036 text = match.group('whole')

1037 refuri = addscheme + unescape(text)

1038 reference = nodes.reference(unescape(text, True), text,

1039 refuri=refuri)

1040 return [reference]

1041 else: # not a valid scheme

1042 raise MarkupMismatch

1043

1044 def pep_reference(self, match, lineno):

1045 text = match.group(0)

1046 if text.startswith('pep-'):

1047 pepnum = int(unescape(match.group('pepnum1')))

1048 elif text.startswith('PEP'):

1049 pepnum = int(unescape(match.group('pepnum2')))

1050 else:

1051 raise MarkupMismatch

1052 ref = (self.document.settings.pep_base_url

1053 + self.document.settings.pep_file_url_template % pepnum)

1054 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1055

1056 rfc_url = 'rfc%d.html'

1057

1058 def rfc_reference(self, match, lineno):

1059 text = match.group(0)

1060 if text.startswith('RFC'):

1061 rfcnum = int(unescape(match.group('rfcnum')))

1062 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum

1063 else:

1064 raise MarkupMismatch

1065 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1066

1067 def implicit_inline(self, text, lineno):

1068 """

1069 Check each of the patterns in `self.implicit_dispatch` for a match,

1070 and dispatch to the stored method for the pattern. Recursively check

1071 the text before and after the match. Return a list of `nodes.Text`

1072 and inline element nodes.

1073 """

1074 if not text:

1075 return []

1076 for pattern, method in self.implicit_dispatch:

1077 match = pattern.search(text)

1078 if match:

1079 try:

1080 # Must recurse on strings before *and* after the match;

1081 # there may be multiple patterns.

1082 return (self.implicit_inline(text[:match.start()], lineno)

1083 + method(match, lineno)

1084 + self.implicit_inline(text[match.end():], lineno))

1085 except MarkupMismatch:

1086 pass

1087 return [nodes.Text(text)]

1088

1089 dispatch = {'*': emphasis,

1090 '**': strong,

1091 '`': interpreted_or_phrase_ref,

1092 '``': literal,

1093 '_`': inline_internal_target,

1094 ']_': footnote_reference,

1095 '|': substitution_reference,

1096 '_': reference,

1097 '__': anonymous_reference}

1098

1099

1100def _loweralpha_to_int(s, _zero=(ord('a')-1)):

1101 return ord(s) - _zero

1102

1103

1104def _upperalpha_to_int(s, _zero=(ord('A')-1)):

1105 return ord(s) - _zero

1106

1107

1108class Body(RSTState):

1109

1110 """

1111 Generic classifier of the first line of a block.

1112 """

1113

1114 double_width_pad_char = tableparser.TableParser.double_width_pad_char

1115 """Padding character for East Asian double-width text."""

1116

1117 enum = Struct()

1118 """Enumerated list parsing information."""

1119

1120 enum.formatinfo = {

1121 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),

1122 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),

1123 'period': Struct(prefix='', suffix='.', start=0, end=-1)}

1124 enum.formats = enum.formatinfo.keys()

1125 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',

1126 'lowerroman', 'upperroman'] # ORDERED!

1127 enum.sequencepats = {'arabic': '[0-9]+',

1128 'loweralpha': '[a-z]',

1129 'upperalpha': '[A-Z]',

1130 'lowerroman': '[ivxlcdm]+',

1131 'upperroman': '[IVXLCDM]+'}

1132 enum.converters = {'arabic': int,

1133 'loweralpha': _loweralpha_to_int,

1134 'upperalpha': _upperalpha_to_int,

1135 'lowerroman': RomanNumeral.from_string,

1136 'upperroman': RomanNumeral.from_string}

1137

1138 enum.sequenceregexps = {}

1139 for sequence in enum.sequences:

1140 enum.sequenceregexps[sequence] = re.compile(

1141 enum.sequencepats[sequence] + '$')

1142

1143 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')

1144 """Matches the top (& bottom) of a full table)."""

1145

1146 simple_table_top_pat = re.compile('=+( +=+)+ *$')

1147 """Matches the top of a simple table."""

1148

1149 simple_table_border_pat = re.compile('=+[ =]*$')

1150 """Matches the bottom & header bottom of a simple table."""

1151

1152 pats = {}

1153 """Fragments of patterns used by transitions."""

1154

1155 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'

1156 pats['alpha'] = '[a-zA-Z]'

1157 pats['alphanum'] = '[a-zA-Z0-9]'

1158 pats['alphanumplus'] = '[a-zA-Z0-9_-]'

1159 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'

1160 '|%(upperroman)s|#)' % enum.sequencepats)

1161 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats

1162 # @@@ Loosen up the pattern? Allow Unicode?

1163 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats

1164 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats

1165 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats

1166 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats

1167

1168 for format in enum.formats:

1169 pats[format] = '(?P<%s>%s%s%s)' % (

1170 format, re.escape(enum.formatinfo[format].prefix),

1171 pats['enum'], re.escape(enum.formatinfo[format].suffix))

1172

1173 patterns = {

1174 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',

1175 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,

1176 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',

1177 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,

1178 'doctest': r'>>>( +|$)',

1179 'line_block': r'\|( +|$)',

1180 'grid_table_top': grid_table_top_pat,

1181 'simple_table_top': simple_table_top_pat,

1182 'explicit_markup': r'\.\.( +|$)',

1183 'anonymous': r'__( +|$)',

1184 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,

1185 'text': r''}

1186 initial_transitions = (

1187 'bullet',

1188 'enumerator',

1189 'field_marker',

1190 'option_marker',

1191 'doctest',

1192 'line_block',

1193 'grid_table_top',

1194 'simple_table_top',

1195 'explicit_markup',

1196 'anonymous',

1197 'line',

1198 'text')

1199

1200 def indent(self, match, context, next_state):

1201 """Block quote."""

1202 (indented, indent, line_offset, blank_finish

1203 ) = self.state_machine.get_indented()

1204 elements = self.block_quote(indented, line_offset)

1205 self.parent += elements

1206 if not blank_finish:

1207 self.parent += self.unindent_warning('Block quote')

1208 return context, next_state, []

1209

1210 def block_quote(self, indented, line_offset):

1211 elements = []

1212 while indented:

1213 blockquote = nodes.block_quote(rawsource='\n'.join(indented))

1214 (blockquote.source, blockquote.line

1215 ) = self.state_machine.get_source_and_line(line_offset+1)

1216 (blockquote_lines,

1217 attribution_lines,

1218 attribution_offset,

1219 indented,

1220 new_line_offset) = self.split_attribution(indented, line_offset)

1221 self.nested_parse(blockquote_lines, line_offset, blockquote)

1222 elements.append(blockquote)

1223 if attribution_lines:

1224 attribution, messages = self.parse_attribution(

1225 attribution_lines, line_offset+attribution_offset)

1226 blockquote += attribution

1227 elements += messages

1228 line_offset = new_line_offset

1229 while indented and not indented[0]:

1230 indented = indented[1:]

1231 line_offset += 1

1232 return elements

1233

1234 # U+2014 is an em-dash:

1235 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')

1236

1237 def split_attribution(self, indented, line_offset):

1238 """

1239 Check for a block quote attribution and split it off:

1240

1241 * First line after a blank line must begin with a dash ("--", "---",

1242 em-dash; matches `self.attribution_pattern`).

1243 * Every line after that must have consistent indentation.

1244 * Attributions must be preceded by block quote content.

1245

1246 Return a tuple of: (block quote content lines, attribution lines,

1247 attribution offset, remaining indented lines, remaining lines offset).

1248 """

1249 blank = None

1250 nonblank_seen = False

1251 for i in range(len(indented)):

1252 line = indented[i].rstrip()

1253 if line:

1254 if nonblank_seen and blank == i - 1: # last line blank

1255 match = self.attribution_pattern.match(line)

1256 if match:

1257 attribution_end, indent = self.check_attribution(

1258 indented, i)

1259 if attribution_end:

1260 a_lines = indented[i:attribution_end]

1261 a_lines.trim_left(match.end(), end=1)

1262 a_lines.trim_left(indent, start=1)

1263 return (indented[:i], a_lines,

1264 i, indented[attribution_end:],

1265 line_offset + attribution_end)

1266 nonblank_seen = True

1267 else:

1268 blank = i

1269 else:

1270 return indented, None, None, None, None

1271

1272 def check_attribution(self, indented, attribution_start):

1273 """

1274 Check attribution shape.

1275 Return the index past the end of the attribution, and the indent.

1276 """

1277 indent = None

1278 i = attribution_start + 1

1279 for i in range(attribution_start + 1, len(indented)):

1280 line = indented[i].rstrip()

1281 if not line:

1282 break

1283 if indent is None:

1284 indent = len(line) - len(line.lstrip())

1285 elif len(line) - len(line.lstrip()) != indent:

1286 return None, None # bad shape; not an attribution

1287 else:

1288 # return index of line after last attribution line:

1289 i += 1

1290 return i, (indent or 0)

1291

1292 def parse_attribution(self, indented, line_offset):

1293 text = '\n'.join(indented).rstrip()

1294 lineno = 1 + line_offset # line_offset is zero-based

1295 textnodes, messages = self.inline_text(text, lineno)

1296 node = nodes.attribution(text, '', *textnodes)

1297 node.source, node.line = self.state_machine.get_source_and_line(lineno)

1298 return node, messages

1299

1300 def bullet(self, match, context, next_state):

1301 """Bullet list item."""

1302 ul = nodes.bullet_list()

1303 ul.source, ul.line = self.state_machine.get_source_and_line()

1304 self.parent += ul

1305 ul['bullet'] = match.string[0]

1306 i, blank_finish = self.list_item(match.end())

1307 ul += i

1308 offset = self.state_machine.line_offset + 1 # next line

1309 new_line_offset, blank_finish = self.nested_list_parse(

1310 self.state_machine.input_lines[offset:],

1311 input_offset=self.state_machine.abs_line_offset() + 1,

1312 node=ul, initial_state='BulletList',

1313 blank_finish=blank_finish)

1314 self.goto_line(new_line_offset)

1315 if not blank_finish:

1316 self.parent += self.unindent_warning('Bullet list')

1317 return [], next_state, []

1318

1319 def list_item(self, indent):

1320 src, srcline = self.state_machine.get_source_and_line()

1321 if self.state_machine.line[indent:]:

1322 indented, line_offset, blank_finish = (

1323 self.state_machine.get_known_indented(indent))

1324 else:

1325 indented, indent, line_offset, blank_finish = (

1326 self.state_machine.get_first_known_indented(indent))

1327 listitem = nodes.list_item('\n'.join(indented))

1328 listitem.source, listitem.line = src, srcline

1329 if indented:

1330 self.nested_parse(indented, input_offset=line_offset,

1331 node=listitem)

1332 return listitem, blank_finish

1333

1334 def enumerator(self, match, context, next_state):

1335 """Enumerated List Item"""

1336 format, sequence, text, ordinal = self.parse_enumerator(match)

1337 if not self.is_enumerated_list_item(ordinal, sequence, format):

1338 raise statemachine.TransitionCorrection('text')

1339 enumlist = nodes.enumerated_list()

1340 (enumlist.source,

1341 enumlist.line) = self.state_machine.get_source_and_line()

1342 self.parent += enumlist

1343 if sequence == '#':

1344 enumlist['enumtype'] = 'arabic'

1345 else:

1346 enumlist['enumtype'] = sequence

1347 enumlist['prefix'] = self.enum.formatinfo[format].prefix

1348 enumlist['suffix'] = self.enum.formatinfo[format].suffix

1349 if ordinal != 1:

1350 enumlist['start'] = ordinal

1351 msg = self.reporter.info(

1352 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'

1353 % (text, ordinal), base_node=enumlist)

1354 self.parent += msg

1355 listitem, blank_finish = self.list_item(match.end())

1356 enumlist += listitem

1357 offset = self.state_machine.line_offset + 1 # next line

1358 newline_offset, blank_finish = self.nested_list_parse(

1359 self.state_machine.input_lines[offset:],

1360 input_offset=self.state_machine.abs_line_offset() + 1,

1361 node=enumlist, initial_state='EnumeratedList',

1362 blank_finish=blank_finish,

1363 extra_settings={'lastordinal': ordinal,

1364 'format': format,

1365 'auto': sequence == '#'})

1366 self.goto_line(newline_offset)

1367 if not blank_finish:

1368 self.parent += self.unindent_warning('Enumerated list')

1369 return [], next_state, []

1370

1371 def parse_enumerator(self, match, expected_sequence=None):

1372 """

1373 Analyze an enumerator and return the results.

1374

1375 :Return:

1376 - the enumerator format ('period', 'parens', or 'rparen'),

1377 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),

1378 - the text of the enumerator, stripped of formatting, and

1379 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;

1380 ``None`` is returned for invalid enumerator text).

1381

1382 The enumerator format has already been determined by the regular

1383 expression match. If `expected_sequence` is given, that sequence is

1384 tried first. If not, we check for Roman numeral 1. This way,

1385 single-character Roman numerals (which are also alphabetical) can be

1386 matched. If no sequence has been matched, all sequences are checked in

1387 order.

1388 """

1389 groupdict = match.groupdict()

1390 sequence = ''

1391 for format in self.enum.formats:

1392 if groupdict[format]: # was this the format matched?

1393 break # yes; keep `format`

1394 else: # shouldn't happen

1395 raise ParserError('enumerator format not matched')

1396 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501

1397 : self.enum.formatinfo[format].end]

1398 if text == '#':

1399 sequence = '#'

1400 elif expected_sequence:

1401 try:

1402 if self.enum.sequenceregexps[expected_sequence].match(text):

1403 sequence = expected_sequence

1404 except KeyError: # shouldn't happen

1405 raise ParserError('unknown enumerator sequence: %s'

1406 % sequence)

1407 elif text == 'i':

1408 sequence = 'lowerroman'

1409 elif text == 'I':

1410 sequence = 'upperroman'

1411 if not sequence:

1412 for sequence in self.enum.sequences:

1413 if self.enum.sequenceregexps[sequence].match(text):

1414 break

1415 else: # shouldn't happen

1416 raise ParserError('enumerator sequence not matched')

1417 if sequence == '#':

1418 ordinal = 1

1419 else:

1420 try:

1421 ordinal = int(self.enum.converters[sequence](text))

1422 except InvalidRomanNumeralError:

1423 ordinal = None

1424 return format, sequence, text, ordinal

1425

1426 def is_enumerated_list_item(self, ordinal, sequence, format):

1427 """

1428 Check validity based on the ordinal value and the second line.

1429

1430 Return true if the ordinal is valid and the second line is blank,

1431 indented, or starts with the next enumerator or an auto-enumerator.

1432 """

1433 if ordinal is None:

1434 return None

1435 try:

1436 next_line = self.state_machine.next_line()

1437 except EOFError: # end of input lines

1438 self.state_machine.previous_line()

1439 return 1

1440 else:

1441 self.state_machine.previous_line()

1442 if not next_line[:1].strip(): # blank or indented

1443 return 1

1444 result = self.make_enumerator(ordinal + 1, sequence, format)

1445 if result:

1446 next_enumerator, auto_enumerator = result

1447 try:

1448 if next_line.startswith((next_enumerator, auto_enumerator)):

1449 return 1

1450 except TypeError:

1451 pass

1452 return None

1453

1454 def make_enumerator(self, ordinal, sequence, format):

1455 """

1456 Construct and return the next enumerated list item marker, and an

1457 auto-enumerator ("#" instead of the regular enumerator).

1458

1459 Return ``None`` for invalid (out of range) ordinals.

1460 """

1461 if sequence == '#':

1462 enumerator = '#'

1463 elif sequence == 'arabic':

1464 enumerator = str(ordinal)

1465 else:

1466 if sequence.endswith('alpha'):

1467 if ordinal > 26:

1468 return None

1469 enumerator = chr(ordinal + ord('a') - 1)

1470 elif sequence.endswith('roman'):

1471 try:

1472 enumerator = RomanNumeral(ordinal).to_uppercase()

1473 except TypeError:

1474 return None

1475 else: # shouldn't happen

1476 raise ParserError('unknown enumerator sequence: "%s"'

1477 % sequence)

1478 if sequence.startswith('lower'):

1479 enumerator = enumerator.lower()

1480 elif sequence.startswith('upper'):

1481 enumerator = enumerator.upper()

1482 else: # shouldn't happen

1483 raise ParserError('unknown enumerator sequence: "%s"'

1484 % sequence)

1485 formatinfo = self.enum.formatinfo[format]

1486 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix

1487 + ' ')

1488 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '

1489 return next_enumerator, auto_enumerator

1490

1491 def field_marker(self, match, context, next_state):

1492 """Field list item."""

1493 field_list = nodes.field_list()

1494 self.parent += field_list

1495 field, blank_finish = self.field(match)

1496 field_list += field

1497 offset = self.state_machine.line_offset + 1 # next line

1498 newline_offset, blank_finish = self.nested_list_parse(

1499 self.state_machine.input_lines[offset:],

1500 input_offset=self.state_machine.abs_line_offset() + 1,

1501 node=field_list, initial_state='FieldList',

1502 blank_finish=blank_finish)

1503 self.goto_line(newline_offset)

1504 if not blank_finish:

1505 self.parent += self.unindent_warning('Field list')

1506 return [], next_state, []

1507

1508 def field(self, match):

1509 name = self.parse_field_marker(match)

1510 src, srcline = self.state_machine.get_source_and_line()

1511 lineno = self.state_machine.abs_line_number()

1512 (indented, indent, line_offset, blank_finish

1513 ) = self.state_machine.get_first_known_indented(match.end())

1514 field_node = nodes.field()

1515 field_node.source = src

1516 field_node.line = srcline

1517 name_nodes, name_messages = self.inline_text(name, lineno)

1518 field_node += nodes.field_name(name, '', *name_nodes)

1519 field_body = nodes.field_body('\n'.join(indented), *name_messages)

1520 field_node += field_body

1521 if indented:

1522 self.parse_field_body(indented, line_offset, field_body)

1523 return field_node, blank_finish

1524

1525 def parse_field_marker(self, match):

1526 """Extract & return field name from a field marker match."""

1527 field = match.group()[1:] # strip off leading ':'

1528 field = field[:field.rfind(':')] # strip off trailing ':' etc.

1529 return field

1530

1531 def parse_field_body(self, indented, offset, node) -> None:

1532 self.nested_parse(indented, input_offset=offset, node=node)

1533

1534 def option_marker(self, match, context, next_state):

1535 """Option list item."""

1536 optionlist = nodes.option_list()

1537 (optionlist.source, optionlist.line

1538 ) = self.state_machine.get_source_and_line()

1539 try:

1540 listitem, blank_finish = self.option_list_item(match)

1541 except MarkupError as error:

1542 # This shouldn't happen; pattern won't match.

1543 msg = self.reporter.error('Invalid option list marker: %s'

1544 % error)

1545 self.parent += msg

1546 (indented, indent, line_offset, blank_finish

1547 ) = self.state_machine.get_first_known_indented(match.end())

1548 elements = self.block_quote(indented, line_offset)

1549 self.parent += elements

1550 if not blank_finish:

1551 self.parent += self.unindent_warning('Option list')

1552 return [], next_state, []

1553 self.parent += optionlist

1554 optionlist += listitem

1555 offset = self.state_machine.line_offset + 1 # next line

1556 newline_offset, blank_finish = self.nested_list_parse(

1557 self.state_machine.input_lines[offset:],

1558 input_offset=self.state_machine.abs_line_offset() + 1,

1559 node=optionlist, initial_state='OptionList',

1560 blank_finish=blank_finish)

1561 self.goto_line(newline_offset)

1562 if not blank_finish:

1563 self.parent += self.unindent_warning('Option list')

1564 return [], next_state, []

1565

1566 def option_list_item(self, match):

1567 offset = self.state_machine.abs_line_offset()

1568 options = self.parse_option_marker(match)

1569 (indented, indent, line_offset, blank_finish

1570 ) = self.state_machine.get_first_known_indented(match.end())

1571 if not indented: # not an option list item

1572 self.goto_line(offset)

1573 raise statemachine.TransitionCorrection('text')

1574 option_group = nodes.option_group('', *options)

1575 description = nodes.description('\n'.join(indented))

1576 option_list_item = nodes.option_list_item('', option_group,

1577 description)

1578 if indented:

1579 self.nested_parse(indented, input_offset=line_offset,

1580 node=description)

1581 return option_list_item, blank_finish

1582

1583 def parse_option_marker(self, match):

1584 """

1585 Return a list of `node.option` and `node.option_argument` objects,

1586 parsed from an option marker match.

1587

1588 :Exception: `MarkupError` for invalid option markers.

1589 """

1590 optlist = []

1591 # split at ", ", except inside < > (complex arguments)

1592 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())

1593 for optionstring in optionstrings:

1594 tokens = optionstring.split()

1595 delimiter = ' '

1596 firstopt = tokens[0].split('=', 1)

1597 if len(firstopt) > 1:

1598 # "--opt=value" form

1599 tokens[:1] = firstopt

1600 delimiter = '='

1601 elif (len(tokens[0]) > 2

1602 and ((tokens[0].startswith('-')

1603 and not tokens[0].startswith('--'))

1604 or tokens[0].startswith('+'))):

1605 # "-ovalue" form

1606 tokens[:1] = [tokens[0][:2], tokens[0][2:]]

1607 delimiter = ''

1608 if len(tokens) > 1 and (tokens[1].startswith('<')

1609 and tokens[-1].endswith('>')):

1610 # "-o <value1 value2>" form; join all values into one token

1611 tokens[1:] = [' '.join(tokens[1:])]

1612 if 0 < len(tokens) <= 2:

1613 option = nodes.option(optionstring)

1614 option += nodes.option_string(tokens[0], tokens[0])

1615 if len(tokens) > 1:

1616 option += nodes.option_argument(tokens[1], tokens[1],

1617 delimiter=delimiter)

1618 optlist.append(option)

1619 else:

1620 raise MarkupError(

1621 'wrong number of option tokens (=%s), should be 1 or 2: '

1622 '"%s"' % (len(tokens), optionstring))

1623 return optlist

1624

1625 def doctest(self, match, context, next_state):

1626 line = self.document.current_line

1627 data = '\n'.join(self.state_machine.get_text_block())

1628 # TODO: Parse with `directives.body.CodeBlock` with

1629 # argument 'pycon' (Python Console) in Docutils 1.0.

1630 n = nodes.doctest_block(data, data)

1631 n.line = line

1632 self.parent += n

1633 return [], next_state, []

1634

1635 def line_block(self, match, context, next_state):

1636 """First line of a line block."""

1637 block = nodes.line_block()

1638 self.parent += block

1639 lineno = self.state_machine.abs_line_number()

1640 (block.source,

1641 block.line) = self.state_machine.get_source_and_line(lineno)

1642 line, messages, blank_finish = self.line_block_line(match, lineno)

1643 block += line

1644 self.parent += messages

1645 if not blank_finish:

1646 offset = self.state_machine.line_offset + 1 # next line

1647 new_line_offset, blank_finish = self.nested_list_parse(

1648 self.state_machine.input_lines[offset:],

1649 input_offset=self.state_machine.abs_line_offset() + 1,

1650 node=block, initial_state='LineBlock',

1651 blank_finish=False)

1652 self.goto_line(new_line_offset)

1653 if not blank_finish:

1654 self.parent += self.reporter.warning(

1655 'Line block ends without a blank line.',

1656 line=lineno+1)

1657 if len(block):

1658 if block[0].indent is None:

1659 block[0].indent = 0

1660 self.nest_line_block_lines(block)

1661 return [], next_state, []

1662

1663 def line_block_line(self, match, lineno):

1664 """Return one line element of a line_block."""

1665 (indented, indent, line_offset, blank_finish

1666 ) = self.state_machine.get_first_known_indented(match.end(),

1667 until_blank=True)

1668 text = '\n'.join(indented)

1669 text_nodes, messages = self.inline_text(text, lineno)

1670 line = nodes.line(text, '', *text_nodes)

1671 (line.source,

1672 line.line) = self.state_machine.get_source_and_line(lineno)

1673 if match.string.rstrip() != '|': # not empty

1674 line.indent = len(match.group(1)) - 1

1675 return line, messages, blank_finish

1676

1677 def nest_line_block_lines(self, block) -> None:

1678 for index in range(1, len(block)):

1679 if block[index].indent is None:

1680 block[index].indent = block[index - 1].indent

1681 self.nest_line_block_segment(block)

1682

1683 def nest_line_block_segment(self, block) -> None:

1684 indents = [item.indent for item in block]

1685 least = min(indents)

1686 new_items = []

1687 new_block = nodes.line_block()

1688 for item in block:

1689 if item.indent > least:

1690 new_block.append(item)

1691 else:

1692 if len(new_block):

1693 self.nest_line_block_segment(new_block)

1694 new_items.append(new_block)

1695 new_block = nodes.line_block()

1696 new_items.append(item)

1697 if len(new_block):

1698 self.nest_line_block_segment(new_block)

1699 new_items.append(new_block)

1700 block[:] = new_items

1701

1702 def grid_table_top(self, match, context, next_state):

1703 """Top border of a full table."""

1704 return self.table_top(match, context, next_state,

1705 self.isolate_grid_table,

1706 tableparser.GridTableParser)

1707

1708 def simple_table_top(self, match, context, next_state):

1709 """Top border of a simple table."""

1710 return self.table_top(match, context, next_state,

1711 self.isolate_simple_table,

1712 tableparser.SimpleTableParser)

1713

1714 def table_top(self, match, context, next_state,

1715 isolate_function, parser_class):

1716 """Top border of a generic table."""

1717 nodelist, blank_finish = self.table(isolate_function, parser_class)

1718 self.parent += nodelist

1719 if not blank_finish:

1720 msg = self.reporter.warning(

1721 'Blank line required after table.',

1722 line=self.state_machine.abs_line_number()+1)

1723 self.parent += msg

1724 return [], next_state, []

1725

1726 def table(self, isolate_function, parser_class):

1727 """Parse a table."""

1728 block, messages, blank_finish = isolate_function()

1729 if block:

1730 try:

1731 parser = parser_class()

1732 tabledata = parser.parse(block)

1733 tableline = (self.state_machine.abs_line_number() - len(block)

1734 + 1)

1735 table = self.build_table(tabledata, tableline)

1736 nodelist = [table] + messages

1737 except tableparser.TableMarkupError as err:

1738 nodelist = self.malformed_table(block, ' '.join(err.args),

1739 offset=err.offset) + messages

1740 else:

1741 nodelist = messages

1742 return nodelist, blank_finish

1743

1744 def isolate_grid_table(self):

1745 messages = []

1746 blank_finish = True

1747 try:

1748 block = self.state_machine.get_text_block(flush_left=True)

1749 except statemachine.UnexpectedIndentationError as err:

1750 block, src, srcline = err.args

1751 messages.append(self.reporter.error('Unexpected indentation.',

1752 source=src, line=srcline))

1753 blank_finish = False

1754 block.disconnect()

1755 # for East Asian chars:

1756 block.pad_double_width(self.double_width_pad_char)

1757 width = len(block[0].strip())

1758 for i in range(len(block)):

1759 block[i] = block[i].strip()

1760 if block[i][0] not in '+|': # check left edge

1761 blank_finish = False

1762 self.state_machine.previous_line(len(block) - i)

1763 del block[i:]

1764 break

1765 if not self.grid_table_top_pat.match(block[-1]): # find bottom

1766 # from second-last to third line of table:

1767 for i in range(len(block) - 2, 1, -1):

1768 if self.grid_table_top_pat.match(block[i]):

1769 self.state_machine.previous_line(len(block) - i + 1)

1770 del block[i+1:]

1771 blank_finish = False

1772 break

1773 else:

1774 detail = 'Bottom border missing or corrupt.'

1775 messages.extend(self.malformed_table(block, detail, i))

1776 return [], messages, blank_finish

1777 for i in range(len(block)): # check right edge

1778 if len(block[i]) != width or block[i][-1] not in '+|':

1779 detail = 'Right border not aligned or missing.'

1780 messages.extend(self.malformed_table(block, detail, i))

1781 return [], messages, blank_finish

1782 return block, messages, blank_finish

1783

1784 def isolate_simple_table(self):

1785 start = self.state_machine.line_offset

1786 lines = self.state_machine.input_lines

1787 limit = len(lines) - 1

1788 toplen = len(lines[start].strip())

1789 pattern_match = self.simple_table_border_pat.match

1790 found = 0

1791 found_at = None

1792 i = start + 1

1793 while i <= limit:

1794 line = lines[i]

1795 match = pattern_match(line)

1796 if match:

1797 if len(line.strip()) != toplen:

1798 self.state_machine.next_line(i - start)

1799 messages = self.malformed_table(

1800 lines[start:i+1], 'Bottom border or header rule does '

1801 'not match top border.', i-start)

1802 return [], messages, i == limit or not lines[i+1].strip()

1803 found += 1

1804 found_at = i

1805 if found == 2 or i == limit or not lines[i+1].strip():

1806 end = i

1807 break

1808 i += 1

1809 else: # reached end of input_lines

1810 details = 'No bottom table border found'

1811 if found:

1812 details += ' or no blank line after table bottom'

1813 self.state_machine.next_line(found_at - start)

1814 block = lines[start:found_at+1]

1815 else:

1816 self.state_machine.next_line(i - start - 1)

1817 block = lines[start:]

1818 messages = self.malformed_table(block, details + '.')

1819 return [], messages, not found

1820 self.state_machine.next_line(end - start)

1821 block = lines[start:end+1]

1822 # for East Asian chars:

1823 block.pad_double_width(self.double_width_pad_char)

1824 return block, [], end == limit or not lines[end+1].strip()

1825

1826 def malformed_table(self, block, detail='', offset=0):

1827 block.replace(self.double_width_pad_char, '')

1828 data = '\n'.join(block)

1829 message = 'Malformed table.'

1830 startline = self.state_machine.abs_line_number() - len(block) + 1

1831 if detail:

1832 message += '\n' + detail

1833 error = self.reporter.error(message, nodes.literal_block(data, data),

1834 line=startline+offset)

1835 return [error]

1836

1837 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):

1838 colwidths, headrows, bodyrows = tabledata

1839 table = nodes.table()

1840 if widths == 'auto':

1841 table['classes'] += ['colwidths-auto']

1842 elif widths: # "grid" or list of integers

1843 table['classes'] += ['colwidths-given']

1844 tgroup = nodes.tgroup(cols=len(colwidths))

1845 table += tgroup

1846 for colwidth in colwidths:

1847 colspec = nodes.colspec(colwidth=colwidth)

1848 if stub_columns:

1849 colspec.attributes['stub'] = True

1850 stub_columns -= 1

1851 tgroup += colspec

1852 if headrows:

1853 thead = nodes.thead()

1854 tgroup += thead

1855 for row in headrows:

1856 thead += self.build_table_row(row, tableline)

1857 tbody = nodes.tbody()

1858 tgroup += tbody

1859 for row in bodyrows:

1860 tbody += self.build_table_row(row, tableline)

1861 return table

1862

1863 def build_table_row(self, rowdata, tableline):

1864 row = nodes.row()

1865 for cell in rowdata:

1866 if cell is None:

1867 continue

1868 morerows, morecols, offset, cellblock = cell

1869 attributes = {}

1870 if morerows:

1871 attributes['morerows'] = morerows

1872 if morecols:

1873 attributes['morecols'] = morecols

1874 entry = nodes.entry(**attributes)

1875 row += entry

1876 if ''.join(cellblock):

1877 self.nested_parse(cellblock, input_offset=tableline+offset,

1878 node=entry)

1879 return row

1880

1881 explicit = Struct()

1882 """Patterns and constants used for explicit markup recognition."""

1883

1884 explicit.patterns = Struct(

1885 target=re.compile(r"""

1886 (

1887 _ # anonymous target

1888 | # *OR*

1889 (?!_) # no underscore at the beginning

1890 (?P<quote>`?) # optional open quote

1891 (?![ `]) # first char. not space or

1892 # backquote

1893 (?P<name> # reference name

1894 .+?

1895 )

1896 %(non_whitespace_escape_before)s

1897 (?P=quote) # close quote if open quote used

1898 )

1899 (?<!(?<!\x00):) # no unescaped colon at end

1900 %(non_whitespace_escape_before)s

1901 [ ]? # optional space

1902 : # end of reference name

1903 ([ ]+|$) # followed by whitespace

1904 """ % vars(Inliner), re.VERBOSE),

1905 reference=re.compile(r"""

1906 (

1907 (?P<simple>%(simplename)s)_

1908 | # *OR*

1909 ` # open backquote

1910 (?![ ]) # not space

1911 (?P<phrase>.+?) # hyperlink phrase

1912 %(non_whitespace_escape_before)s

1913 `_ # close backquote,

1914 # reference mark

1915 )

1916 $ # end of string

1917 """ % vars(Inliner), re.VERBOSE),

1918 substitution=re.compile(r"""

1919 (

1920 (?![ ]) # first char. not space

1921 (?P<name>.+?) # substitution text

1922 %(non_whitespace_escape_before)s

1923 \| # close delimiter

1924 )

1925 ([ ]+|$) # followed by whitespace

1926 """ % vars(Inliner),

1927 re.VERBOSE),)

1928

1929 def footnote(self, match):

1930 src, srcline = self.state_machine.get_source_and_line()

1931 (indented, indent, offset, blank_finish

1932 ) = self.state_machine.get_first_known_indented(match.end())

1933 label = match.group(1)

1934 name = normalize_name(label)

1935 footnote = nodes.footnote('\n'.join(indented))

1936 footnote.source = src

1937 footnote.line = srcline

1938 if name[0] == '#': # auto-numbered

1939 name = name[1:] # autonumber label

1940 footnote['auto'] = 1

1941 if name:

1942 footnote['names'].append(name)

1943 self.document.note_autofootnote(footnote)

1944 elif name == '*': # auto-symbol

1945 name = ''

1946 footnote['auto'] = '*'

1947 self.document.note_symbol_footnote(footnote)

1948 else: # manually numbered

1949 footnote += nodes.label('', label)

1950 footnote['names'].append(name)

1951 self.document.note_footnote(footnote)

1952 if name:

1953 self.document.note_explicit_target(footnote, footnote)

1954 else:

1955 self.document.set_id(footnote, footnote)

1956 if indented:

1957 self.nested_parse(indented, input_offset=offset, node=footnote)

1958 else:

1959 footnote += self.reporter.warning('Footnote content expected.')

1960 return [footnote], blank_finish

1961

1962 def citation(self, match):

1963 src, srcline = self.state_machine.get_source_and_line()

1964 (indented, indent, offset, blank_finish

1965 ) = self.state_machine.get_first_known_indented(match.end())

1966 label = match.group(1)

1967 name = normalize_name(label)

1968 citation = nodes.citation('\n'.join(indented))

1969 citation.source = src

1970 citation.line = srcline

1971 citation += nodes.label('', label)

1972 citation['names'].append(name)

1973 self.document.note_citation(citation)

1974 self.document.note_explicit_target(citation, citation)

1975 if indented:

1976 self.nested_parse(indented, input_offset=offset, node=citation)

1977 else:

1978 citation += self.reporter.warning('Citation content expected.')

1979 return [citation], blank_finish

1980

1981 def hyperlink_target(self, match):

1982 pattern = self.explicit.patterns.target

1983 lineno = self.state_machine.abs_line_number()

1984 (block, indent, offset, blank_finish

1985 ) = self.state_machine.get_first_known_indented(

1986 match.end(), until_blank=True, strip_indent=False)

1987 blocktext = match.string[:match.end()] + '\n'.join(block)

1988 block = [escape2null(line) for line in block]

1989 escaped = block[0]

1990 blockindex = 0

1991 while True:

1992 targetmatch = pattern.match(escaped)

1993 if targetmatch:

1994 break

1995 blockindex += 1

1996 try:

1997 escaped += block[blockindex]

1998 except IndexError:

1999 raise MarkupError('malformed hyperlink target.')

2000 del block[:blockindex]

2001 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()

2002 target = self.make_target(block, blocktext, lineno,

2003 targetmatch.group('name'))

2004 return [target], blank_finish

2005

2006 def make_target(self, block, block_text, lineno, target_name):

2007 target_type, data = self.parse_target(block, block_text, lineno)

2008 if target_type == 'refname':

2009 target = nodes.target(block_text, '', refname=normalize_name(data))

2010 target.indirect_reference_name = data

2011 self.add_target(target_name, '', target, lineno)

2012 self.document.note_indirect_target(target)

2013 return target

2014 elif target_type == 'refuri':

2015 target = nodes.target(block_text, '')

2016 self.add_target(target_name, data, target, lineno)

2017 return target

2018 else:

2019 return data

2020

2021 def parse_target(self, block, block_text, lineno):

2022 """

2023 Determine the type of reference of a target.

2024

2025 :Return: A 2-tuple, one of:

2026

2027 - 'refname' and the indirect reference name

2028 - 'refuri' and the URI

2029 - 'malformed' and a system_message node

2030 """

2031 if block and block[-1].strip()[-1:] == '_': # possible indirect target

2032 reference = ' '.join(line.strip() for line in block)

2033 refname = self.is_reference(reference)

2034 if refname:

2035 return 'refname', refname

2036 ref_parts = split_escaped_whitespace(' '.join(block))

2037 reference = ' '.join(''.join(unescape(part).split())

2038 for part in ref_parts)

2039 return 'refuri', reference

2040

2041 def is_reference(self, reference):

2042 match = self.explicit.patterns.reference.match(

2043 whitespace_normalize_name(reference))

2044 if not match:

2045 return None

2046 return unescape(match.group('simple') or match.group('phrase'))

2047

2048 def add_target(self, targetname, refuri, target, lineno):

2049 target.line = lineno

2050 if targetname:

2051 name = normalize_name(unescape(targetname))

2052 target['names'].append(name)

2053 if refuri:

2054 uri = self.inliner.adjust_uri(refuri)

2055 if uri:

2056 target['refuri'] = uri

2057 else:

2058 raise ApplicationError('problem with URI: %r' % refuri)

2059 self.document.note_explicit_target(target, self.parent)

2060 else: # anonymous target

2061 if refuri:

2062 target['refuri'] = refuri

2063 target['anonymous'] = True

2064 self.document.note_anonymous_target(target)

2065

2066 def substitution_def(self, match):

2067 pattern = self.explicit.patterns.substitution

2068 src, srcline = self.state_machine.get_source_and_line()

2069 (block, indent, offset, blank_finish

2070 ) = self.state_machine.get_first_known_indented(match.end(),

2071 strip_indent=False)

2072 blocktext = (match.string[:match.end()] + '\n'.join(block))

2073 block.disconnect()

2074 escaped = escape2null(block[0].rstrip())

2075 blockindex = 0

2076 while True:

2077 subdefmatch = pattern.match(escaped)

2078 if subdefmatch:

2079 break

2080 blockindex += 1

2081 try:

2082 escaped = escaped + ' ' + escape2null(

2083 block[blockindex].strip())

2084 except IndexError:

2085 raise MarkupError('malformed substitution definition.')

2086 del block[:blockindex] # strip out the substitution marker

2087 start = subdefmatch.end()-len(escaped)-1

2088 block[0] = (block[0].strip() + ' ')[start:-1]

2089 if not block[0]:

2090 del block[0]

2091 offset += 1

2092 while block and not block[-1].strip():

2093 block.pop()

2094 subname = subdefmatch.group('name')

2095 substitution_node = nodes.substitution_definition(blocktext)

2096 substitution_node.source = src

2097 substitution_node.line = srcline

2098 if not block:

2099 msg = self.reporter.warning(

2100 'Substitution definition "%s" missing contents.' % subname,

2101 nodes.literal_block(blocktext, blocktext),

2102 source=src, line=srcline)

2103 return [msg], blank_finish

2104 block[0] = block[0].strip()

2105 substitution_node['names'].append(

2106 nodes.whitespace_normalize_name(subname))

2107 new_abs_offset, blank_finish = self.nested_list_parse(

2108 block, input_offset=offset, node=substitution_node,

2109 initial_state='SubstitutionDef', blank_finish=blank_finish)

2110 i = 0

2111 for node in substitution_node[:]:

2112 if not (isinstance(node, nodes.Inline)

2113 or isinstance(node, nodes.Text)):

2114 self.parent += substitution_node[i]

2115 del substitution_node[i]

2116 else:

2117 i += 1

2118 for node in substitution_node.findall(nodes.Element):

2119 if self.disallowed_inside_substitution_definitions(node):

2120 pformat = nodes.literal_block('', node.pformat().rstrip())

2121 msg = self.reporter.error(

2122 'Substitution definition contains illegal element <%s>:'

2123 % node.tagname,

2124 pformat, nodes.literal_block(blocktext, blocktext),

2125 source=src, line=srcline)

2126 return [msg], blank_finish

2127 if len(substitution_node) == 0:

2128 msg = self.reporter.warning(

2129 'Substitution definition "%s" empty or invalid.' % subname,

2130 nodes.literal_block(blocktext, blocktext),

2131 source=src, line=srcline)

2132 return [msg], blank_finish

2133 self.document.note_substitution_def(

2134 substitution_node, subname, self.parent)

2135 return [substitution_node], blank_finish

2136

2137 def disallowed_inside_substitution_definitions(self, node) -> bool:

2138 if (node['ids']

2139 or isinstance(node, nodes.reference) and node.get('anonymous')

2140 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501

2141 return True

2142 else:

2143 return False

2144

2145 def directive(self, match, **option_presets):

2146 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""

2147 type_name = match.group(1)

2148 directive_class, messages = directives.directive(

2149 type_name, self.memo.language, self.document)

2150 self.parent += messages

2151 if directive_class:

2152 return self.run_directive(

2153 directive_class, match, type_name, option_presets)

2154 else:

2155 return self.unknown_directive(type_name)

2156

2157 def run_directive(self, directive, match, type_name, option_presets):

2158 """

2159 Parse a directive then run its directive function.

2160

2161 Parameters:

2162

2163 - `directive`: The class implementing the directive. Must be

2164 a subclass of `rst.Directive`.

2165

2166 - `match`: A regular expression match object which matched the first

2167 line of the directive.

2168

2169 - `type_name`: The directive name, as used in the source text.

2170

2171 - `option_presets`: A dictionary of preset options, defaults for the

2172 directive options. Currently, only an "alt" option is passed by

2173 substitution definitions (value: the substitution name), which may

2174 be used by an embedded image directive.

2175

2176 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.

2177 """

2178 if isinstance(directive, (FunctionType, MethodType)):

2179 from docutils.parsers.rst import convert_directive_function

2180 directive = convert_directive_function(directive)

2181 lineno = self.state_machine.abs_line_number()

2182 initial_line_offset = self.state_machine.line_offset

2183 (indented, indent, line_offset, blank_finish

2184 ) = self.state_machine.get_first_known_indented(match.end(),

2185 strip_top=0)

2186 block_text = '\n'.join(self.state_machine.input_lines[

2187 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501

2188 try:

2189 arguments, options, content, content_offset = (

2190 self.parse_directive_block(indented, line_offset,

2191 directive, option_presets))

2192 except MarkupError as detail:

2193 error = self.reporter.error(

2194 'Error in "%s" directive:\n%s.' % (type_name,

2195 ' '.join(detail.args)),

2196 nodes.literal_block(block_text, block_text), line=lineno)

2197 return [error], blank_finish

2198 directive_instance = directive(

2199 type_name, arguments, options, content, lineno,

2200 content_offset, block_text, self, self.state_machine)

2201 try:

2202 result = directive_instance.run()

2203 except docutils.parsers.rst.DirectiveError as error:

2204 msg_node = self.reporter.system_message(error.level, error.msg,

2205 line=lineno)

2206 msg_node += nodes.literal_block(block_text, block_text)

2207 result = [msg_node]

2208 assert isinstance(result, list), \

2209 'Directive "%s" must return a list of nodes.' % type_name

2210 for i in range(len(result)):

2211 assert isinstance(result[i], nodes.Node), \

2212 ('Directive "%s" returned non-Node object (index %s): %r'

2213 % (type_name, i, result[i]))

2214 return (result,

2215 blank_finish or self.state_machine.is_next_line_blank())

2216

2217 def parse_directive_block(self, indented, line_offset, directive,

2218 option_presets):

2219 option_spec = directive.option_spec

2220 has_content = directive.has_content

2221 if indented and not indented[0].strip():

2222 indented.trim_start()

2223 line_offset += 1

2224 while indented and not indented[-1].strip():

2225 indented.trim_end()

2226 if indented and (directive.required_arguments

2227 or directive.optional_arguments

2228 or option_spec):

2229 for i, line in enumerate(indented):

2230 if not line.strip():

2231 break

2232 else:

2233 i += 1

2234 arg_block = indented[:i]

2235 content = indented[i+1:]

2236 content_offset = line_offset + i + 1

2237 else:

2238 content = indented

2239 content_offset = line_offset

2240 arg_block = []

2241 if option_spec:

2242 options, arg_block = self.parse_directive_options(

2243 option_presets, option_spec, arg_block)

2244 else:

2245 options = {}

2246 if arg_block and not (directive.required_arguments

2247 or directive.optional_arguments):

2248 content = arg_block + indented[i:]

2249 content_offset = line_offset

2250 arg_block = []

2251 while content and not content[0].strip():

2252 content.trim_start()

2253 content_offset += 1

2254 if directive.required_arguments or directive.optional_arguments:

2255 arguments = self.parse_directive_arguments(

2256 directive, arg_block)

2257 else:

2258 arguments = []

2259 if content and not has_content:

2260 raise MarkupError('no content permitted')

2261 return arguments, options, content, content_offset

2262

2263 def parse_directive_options(self, option_presets, option_spec, arg_block):

2264 options = option_presets.copy()

2265 for i, line in enumerate(arg_block):

2266 if re.match(Body.patterns['field_marker'], line):

2267 opt_block = arg_block[i:]

2268 arg_block = arg_block[:i]

2269 break

2270 else:

2271 opt_block = []

2272 if opt_block:

2273 success, data = self.parse_extension_options(option_spec,

2274 opt_block)

2275 if success: # data is a dict of options

2276 options.update(data)

2277 else: # data is an error string

2278 raise MarkupError(data)

2279 return options, arg_block

2280

2281 def parse_directive_arguments(self, directive, arg_block):

2282 required = directive.required_arguments

2283 optional = directive.optional_arguments

2284 arg_text = '\n'.join(arg_block)

2285 arguments = arg_text.split()

2286 if len(arguments) < required:

2287 raise MarkupError('%s argument(s) required, %s supplied'

2288 % (required, len(arguments)))

2289 elif len(arguments) > required + optional:

2290 if directive.final_argument_whitespace:

2291 arguments = arg_text.split(None, required + optional - 1)

2292 else:

2293 raise MarkupError(

2294 'maximum %s argument(s) allowed, %s supplied'

2295 % (required + optional, len(arguments)))

2296 return arguments

2297

2298 def parse_extension_options(self, option_spec, datalines):

2299 """

2300 Parse `datalines` for a field list containing extension options

2301 matching `option_spec`.

2302

2303 :Parameters:

2304 - `option_spec`: a mapping of option name to conversion

2305 function, which should raise an exception on bad input.

2306 - `datalines`: a list of input strings.

2307

2308 :Return:

2309 - Success value, 1 or 0.

2310 - An option dictionary on success, an error string on failure.

2311 """

2312 node = nodes.field_list()

2313 newline_offset, blank_finish = self.nested_list_parse(

2314 datalines, 0, node, initial_state='ExtensionOptions',

2315 blank_finish=True)

2316 if newline_offset != len(datalines): # incomplete parse of block

2317 return 0, 'invalid option block'

2318 try:

2319 options = utils.extract_extension_options(node, option_spec)

2320 except KeyError as detail:

2321 return 0, 'unknown option: "%s"' % detail.args[0]

2322 except (ValueError, TypeError) as detail:

2323 return 0, 'invalid option value: %s' % ' '.join(detail.args)

2324 except utils.ExtensionOptionError as detail:

2325 return 0, 'invalid option data: %s' % ' '.join(detail.args)

2326 if blank_finish:

2327 return 1, options

2328 else:

2329 return 0, 'option data incompletely parsed'

2330

2331 def unknown_directive(self, type_name):

2332 lineno = self.state_machine.abs_line_number()

2333 (indented, indent, offset, blank_finish

2334 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)

2335 text = '\n'.join(indented)

2336 error = self.reporter.error('Unknown directive type "%s".' % type_name,

2337 nodes.literal_block(text, text),

2338 line=lineno)

2339 return [error], blank_finish

2340

2341 def comment(self, match):

2342 if self.state_machine.is_next_line_blank():

2343 first_comment_line = match.string[match.end():]

2344 if not first_comment_line.strip(): # empty comment

2345 return [nodes.comment()], True # "A tiny but practical wart."

2346 if first_comment_line.startswith('end of inclusion from "'):

2347 # cf. parsers.rst.directives.misc.Include

2348 self.document.include_log.pop()

2349 return [], True

2350 (indented, indent, offset, blank_finish

2351 ) = self.state_machine.get_first_known_indented(match.end())

2352 while indented and not indented[-1].strip():

2353 indented.trim_end()

2354 text = '\n'.join(indented)

2355 return [nodes.comment(text, text)], blank_finish

2356

2357 explicit.constructs = [

2358 (footnote,

2359 re.compile(r"""

2360 \.\.[ ]+ # explicit markup start

2361 \[

2362 ( # footnote label:

2363 [0-9]+ # manually numbered footnote

2364 | # *OR*

2365 \# # anonymous auto-numbered footnote

2366 | # *OR*

2367 \#%s # auto-number ed?) footnote label

2368 | # *OR*

2369 \* # auto-symbol footnote

2370 )

2371 \]

2372 ([ ]+|$) # whitespace or end of line

2373 """ % Inliner.simplename, re.VERBOSE)),

2374 (citation,

2375 re.compile(r"""

2376 \.\.[ ]+ # explicit markup start

2377 \[(%s)\] # citation label

2378 ([ ]+|$) # whitespace or end of line

2379 """ % Inliner.simplename, re.VERBOSE)),

2380 (hyperlink_target,

2381 re.compile(r"""

2382 \.\.[ ]+ # explicit markup start

2383 _ # target indicator

2384 (?![ ]|$) # first char. not space or EOL

2385 """, re.VERBOSE)),

2386 (substitution_def,

2387 re.compile(r"""

2388 \.\.[ ]+ # explicit markup start

2389 \| # substitution indicator

2390 (?![ ]|$) # first char. not space or EOL

2391 """, re.VERBOSE)),

2392 (directive,

2393 re.compile(r"""

2394 \.\.[ ]+ # explicit markup start

2395 (%s) # directive name

2396 [ ]? # optional space

2397 :: # directive delimiter

2398 ([ ]+|$) # whitespace or end of line

2399 """ % Inliner.simplename, re.VERBOSE))]

2400

2401 def explicit_markup(self, match, context, next_state):

2402 """Footnotes, hyperlink targets, directives, comments."""

2403 nodelist, blank_finish = self.explicit_construct(match)

2404 self.parent += nodelist

2405 self.explicit_list(blank_finish)

2406 return [], next_state, []

2407

2408 def explicit_construct(self, match):

2409 """Determine which explicit construct this is, parse & return it."""

2410 errors = []

2411 for method, pattern in self.explicit.constructs:

2412 expmatch = pattern.match(match.string)

2413 if expmatch:

2414 try:

2415 return method(self, expmatch)

2416 except MarkupError as error:

2417 lineno = self.state_machine.abs_line_number()

2418 message = ' '.join(error.args)

2419 errors.append(self.reporter.warning(message, line=lineno))

2420 break

2421 nodelist, blank_finish = self.comment(match)

2422 return nodelist + errors, blank_finish

2423

2424 def explicit_list(self, blank_finish) -> None:

2425 """

2426 Create a nested state machine for a series of explicit markup

2427 constructs (including anonymous hyperlink targets).

2428 """

2429 offset = self.state_machine.line_offset + 1 # next line

2430 newline_offset, blank_finish = self.nested_list_parse(

2431 self.state_machine.input_lines[offset:],

2432 input_offset=self.state_machine.abs_line_offset() + 1,

2433 node=self.parent, initial_state='Explicit',

2434 blank_finish=blank_finish,

2435 match_titles=self.state_machine.match_titles)

2436 self.goto_line(newline_offset)

2437 if not blank_finish:

2438 self.parent += self.unindent_warning('Explicit markup')

2439

2440 def anonymous(self, match, context, next_state):

2441 """Anonymous hyperlink targets."""

2442 nodelist, blank_finish = self.anonymous_target(match)

2443 self.parent += nodelist

2444 self.explicit_list(blank_finish)

2445 return [], next_state, []

2446

2447 def anonymous_target(self, match):

2448 lineno = self.state_machine.abs_line_number()

2449 (block, indent, offset, blank_finish

2450 ) = self.state_machine.get_first_known_indented(match.end(),

2451 until_blank=True)

2452 blocktext = match.string[:match.end()] + '\n'.join(block)

2453 block = [escape2null(line) for line in block]

2454 target = self.make_target(block, blocktext, lineno, '')

2455 return [target], blank_finish

2456

2457 def line(self, match, context, next_state):

2458 """Section title overline or transition marker."""

2459 if self.state_machine.match_titles:

2460 return [match.string], 'Line', []

2461 elif match.string.strip() == '::':

2462 raise statemachine.TransitionCorrection('text')

2463 elif len(match.string.strip()) < 4:

2464 msg = self.reporter.info(

2465 'Unexpected possible title overline or transition.\n'

2466 "Treating it as ordinary text because it's so short.",

2467 line=self.state_machine.abs_line_number())

2468 self.parent += msg

2469 raise statemachine.TransitionCorrection('text')

2470 else:

2471 blocktext = self.state_machine.line

2472 msg = self.reporter.error(

2473 'Unexpected section title or transition.',

2474 nodes.literal_block(blocktext, blocktext),

2475 line=self.state_machine.abs_line_number())

2476 self.parent += msg

2477 return [], next_state, []

2478

2479 def text(self, match, context, next_state):

2480 """Titles, definition lists, paragraphs."""

2481 return [match.string], 'Text', []

2482

2483

2484class RFC2822Body(Body):

2485

2486 """

2487 RFC2822 headers are only valid as the first constructs in documents. As

2488 soon as anything else appears, the `Body` state should take over.

2489 """

2490

2491 patterns = Body.patterns.copy() # can't modify the original

2492 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'

2493 initial_transitions = [(name, 'Body')

2494 for name in Body.initial_transitions]

2495 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'

2496

2497 def rfc2822(self, match, context, next_state):

2498 """RFC2822-style field list item."""

2499 fieldlist = nodes.field_list(classes=['rfc2822'])

2500 self.parent += fieldlist

2501 field, blank_finish = self.rfc2822_field(match)

2502 fieldlist += field

2503 offset = self.state_machine.line_offset + 1 # next line

2504 newline_offset, blank_finish = self.nested_list_parse(

2505 self.state_machine.input_lines[offset:],

2506 input_offset=self.state_machine.abs_line_offset() + 1,

2507 node=fieldlist, initial_state='RFC2822List',

2508 blank_finish=blank_finish)

2509 self.goto_line(newline_offset)

2510 if not blank_finish:

2511 self.parent += self.unindent_warning(

2512 'RFC2822-style field list')

2513 return [], next_state, []

2514

2515 def rfc2822_field(self, match):

2516 name = match.string[:match.string.find(':')]

2517 (indented, indent, line_offset, blank_finish

2518 ) = self.state_machine.get_first_known_indented(match.end(),

2519 until_blank=True)

2520 fieldnode = nodes.field()

2521 fieldnode += nodes.field_name(name, name)

2522 fieldbody = nodes.field_body('\n'.join(indented))

2523 fieldnode += fieldbody

2524 if indented:

2525 self.nested_parse(indented, input_offset=line_offset,

2526 node=fieldbody)

2527 return fieldnode, blank_finish

2528

2529

2530class SpecializedBody(Body):

2531

2532 """

2533 Superclass for second and subsequent compound element members. Compound

2534 elements are lists and list-like constructs.

2535

2536 All transition methods are disabled (redefined as `invalid_input`).

2537 Override individual methods in subclasses to re-enable.

2538

2539 For example, once an initial bullet list item, say, is recognized, the

2540 `BulletList` subclass takes over, with a "bullet_list" node as its

2541 container. Upon encountering the initial bullet list item, `Body.bullet`

2542 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which

2543 starts up a nested parsing session with `BulletList` as the initial state.

2544 Only the ``bullet`` transition method is enabled in `BulletList`; as long

2545 as only bullet list items are encountered, they are parsed and inserted

2546 into the container. The first construct which is *not* a bullet list item

2547 triggers the `invalid_input` method, which ends the nested parse and

2548 closes the container. `BulletList` needs to recognize input that is

2549 invalid in the context of a bullet list, which means everything *other

2550 than* bullet list items, so it inherits the transition list created in

2551 `Body`.

2552 """

2553

2554 def invalid_input(self, match=None, context=None, next_state=None):

2555 """Not a compound element member. Abort this state machine."""

2556 self.state_machine.previous_line() # back up so parent SM can reassess

2557 raise EOFError

2558

2559 indent = invalid_input

2560 bullet = invalid_input

2561 enumerator = invalid_input

2562 field_marker = invalid_input

2563 option_marker = invalid_input

2564 doctest = invalid_input

2565 line_block = invalid_input

2566 grid_table_top = invalid_input

2567 simple_table_top = invalid_input

2568 explicit_markup = invalid_input

2569 anonymous = invalid_input

2570 line = invalid_input

2571 text = invalid_input

2572

2573

2574class BulletList(SpecializedBody):

2575

2576 """Second and subsequent bullet_list list_items."""

2577

2578 def bullet(self, match, context, next_state):

2579 """Bullet list item."""

2580 if match.string[0] != self.parent['bullet']:

2581 # different bullet: new list

2582 self.invalid_input()

2583 listitem, blank_finish = self.list_item(match.end())

2584 self.parent += listitem

2585 self.blank_finish = blank_finish

2586 return [], next_state, []

2587

2588

2589class DefinitionList(SpecializedBody):

2590

2591 """Second and subsequent definition_list_items."""

2592

2593 def text(self, match, context, next_state):

2594 """Definition lists."""

2595 return [match.string], 'Definition', []

2596

2597

2598class EnumeratedList(SpecializedBody):

2599

2600 """Second and subsequent enumerated_list list_items."""

2601

2602 def enumerator(self, match, context, next_state):

2603 """Enumerated list item."""

2604 format, sequence, text, ordinal = self.parse_enumerator(

2605 match, self.parent['enumtype'])

2606 if (format != self.format

2607 or (sequence != '#' and (sequence != self.parent['enumtype']

2608 or self.auto

2609 or ordinal != (self.lastordinal + 1)))

2610 or not self.is_enumerated_list_item(ordinal, sequence, format)):

2611 # different enumeration: new list

2612 self.invalid_input()

2613 if sequence == '#':

2614 self.auto = 1

2615 listitem, blank_finish = self.list_item(match.end())

2616 self.parent += listitem

2617 self.blank_finish = blank_finish

2618 self.lastordinal = ordinal

2619 return [], next_state, []

2620

2621

2622class FieldList(SpecializedBody):

2623

2624 """Second and subsequent field_list fields."""

2625

2626 def field_marker(self, match, context, next_state):

2627 """Field list field."""

2628 field, blank_finish = self.field(match)

2629 self.parent += field

2630 self.blank_finish = blank_finish

2631 return [], next_state, []

2632

2633

2634class OptionList(SpecializedBody):

2635

2636 """Second and subsequent option_list option_list_items."""

2637

2638 def option_marker(self, match, context, next_state):

2639 """Option list item."""

2640 try:

2641 option_list_item, blank_finish = self.option_list_item(match)

2642 except MarkupError:

2643 self.invalid_input()

2644 self.parent += option_list_item

2645 self.blank_finish = blank_finish

2646 return [], next_state, []

2647

2648

2649class RFC2822List(SpecializedBody, RFC2822Body):

2650

2651 """Second and subsequent RFC2822-style field_list fields."""

2652

2653 patterns = RFC2822Body.patterns

2654 initial_transitions = RFC2822Body.initial_transitions

2655

2656 def rfc2822(self, match, context, next_state):

2657 """RFC2822-style field list item."""

2658 field, blank_finish = self.rfc2822_field(match)

2659 self.parent += field

2660 self.blank_finish = blank_finish

2661 return [], 'RFC2822List', []

2662

2663 blank = SpecializedBody.invalid_input

2664

2665

2666class ExtensionOptions(FieldList):

2667

2668 """

2669 Parse field_list fields for extension options.

2670

2671 No nested parsing is done (including inline markup parsing).

2672 """

2673

2674 def parse_field_body(self, indented, offset, node) -> None:

2675 """Override `Body.parse_field_body` for simpler parsing."""

2676 lines = []

2677 for line in list(indented) + ['']:

2678 if line.strip():

2679 lines.append(line)

2680 elif lines:

2681 text = '\n'.join(lines)

2682 node += nodes.paragraph(text, text)

2683 lines = []

2684

2685

2686class LineBlock(SpecializedBody):

2687

2688 """Second and subsequent lines of a line_block."""

2689

2690 blank = SpecializedBody.invalid_input

2691

2692 def line_block(self, match, context, next_state):

2693 """New line of line block."""

2694 lineno = self.state_machine.abs_line_number()

2695 line, messages, blank_finish = self.line_block_line(match, lineno)

2696 self.parent += line

2697 self.parent.parent += messages

2698 self.blank_finish = blank_finish

2699 return [], next_state, []

2700

2701

2702class Explicit(SpecializedBody):

2703

2704 """Second and subsequent explicit markup construct."""

2705

2706 def explicit_markup(self, match, context, next_state):

2707 """Footnotes, hyperlink targets, directives, comments."""

2708 nodelist, blank_finish = self.explicit_construct(match)

2709 self.parent += nodelist

2710 self.blank_finish = blank_finish

2711 return [], next_state, []

2712

2713 def anonymous(self, match, context, next_state):

2714 """Anonymous hyperlink targets."""

2715 nodelist, blank_finish = self.anonymous_target(match)

2716 self.parent += nodelist

2717 self.blank_finish = blank_finish

2718 return [], next_state, []

2719

2720 blank = SpecializedBody.invalid_input

2721

2722

2723class SubstitutionDef(Body):

2724

2725 """

2726 Parser for the contents of a substitution_definition element.

2727 """

2728

2729 patterns = {

2730 'embedded_directive': re.compile(r'(%s)::( +|$)'

2731 % Inliner.simplename),

2732 'text': r''}

2733 initial_transitions = ['embedded_directive', 'text']

2734

2735 def embedded_directive(self, match, context, next_state):

2736 nodelist, blank_finish = self.directive(match,

2737 alt=self.parent['names'][0])

2738 self.parent += nodelist

2739 if not self.state_machine.at_eof():

2740 self.blank_finish = blank_finish

2741 raise EOFError

2742

2743 def text(self, match, context, next_state):

2744 if not self.state_machine.at_eof():

2745 self.blank_finish = self.state_machine.is_next_line_blank()

2746 raise EOFError

2747

2748

2749class Text(RSTState):

2750

2751 """

2752 Classifier of second line of a text block.

2753

2754 Could be a paragraph, a definition list item, or a title.

2755 """

2756

2757 patterns = {'underline': Body.patterns['line'],

2758 'text': r''}

2759 initial_transitions = [('underline', 'Body'), ('text', 'Body')]

2760

2761 def blank(self, match, context, next_state):

2762 """End of paragraph."""

2763 # NOTE: self.paragraph returns [node, system_message(s)], literalnext

2764 paragraph, literalnext = self.paragraph(

2765 context, self.state_machine.abs_line_number() - 1)

2766 self.parent += paragraph

2767 if literalnext:

2768 self.parent += self.literal_block()

2769 return [], 'Body', []

2770

2771 def eof(self, context):

2772 if context:

2773 self.blank(None, context, None)

2774 return []

2775

2776 def indent(self, match, context, next_state):

2777 """Definition list item."""

2778 dl = nodes.definition_list()

2779 # the definition list starts on the line before the indent:

2780 lineno = self.state_machine.abs_line_number() - 1

2781 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)

2782 dl_item, blank_finish = self.definition_list_item(context)

2783 dl += dl_item

2784 self.parent += dl

2785 offset = self.state_machine.line_offset + 1 # next line

2786 newline_offset, blank_finish = self.nested_list_parse(

2787 self.state_machine.input_lines[offset:],

2788 input_offset=self.state_machine.abs_line_offset() + 1,

2789 node=dl, initial_state='DefinitionList',

2790 blank_finish=blank_finish, blank_finish_state='Definition')

2791 self.goto_line(newline_offset)

2792 if not blank_finish:

2793 self.parent += self.unindent_warning('Definition list')

2794 return [], 'Body', []

2795

2796 def underline(self, match, context, next_state):

2797 """Section title."""

2798 lineno = self.state_machine.abs_line_number()

2799 title = context[0].rstrip()

2800 underline = match.string.rstrip()

2801 source = title + '\n' + underline

2802 messages = []

2803 if column_width(title) > len(underline):

2804 if len(underline) < 4:

2805 if self.state_machine.match_titles:

2806 msg = self.reporter.info(

2807 'Possible title underline, too short for the title.\n'

2808 "Treating it as ordinary text because it's so short.",

2809 line=lineno)

2810 self.parent += msg

2811 raise statemachine.TransitionCorrection('text')

2812 else:

2813 blocktext = context[0] + '\n' + self.state_machine.line

2814 msg = self.reporter.warning(

2815 'Title underline too short.',

2816 nodes.literal_block(blocktext, blocktext),

2817 line=lineno)

2818 messages.append(msg)

2819 if not self.state_machine.match_titles:

2820 blocktext = context[0] + '\n' + self.state_machine.line

2821 # We need get_source_and_line() here to report correctly

2822 src, srcline = self.state_machine.get_source_and_line()

2823 # TODO: why is abs_line_number() == srcline+1

2824 # if the error is in a table (try with test_tables.py)?

2825 # print("get_source_and_line", srcline)

2826 # print("abs_line_number", self.state_machine.abs_line_number())

2827 msg = self.reporter.error(

2828 'Unexpected section title.',

2829 nodes.literal_block(blocktext, blocktext),

2830 source=src, line=srcline)

2831 self.parent += messages

2832 self.parent += msg

2833 return [], next_state, []

2834 style = underline[0]

2835 context[:] = []

2836 self.section(title, source, style, lineno - 1, messages)

2837 return [], next_state, []

2838

2839 def text(self, match, context, next_state):

2840 """Paragraph."""

2841 startline = self.state_machine.abs_line_number() - 1

2842 msg = None

2843 try:

2844 block = self.state_machine.get_text_block(flush_left=True)

2845 except statemachine.UnexpectedIndentationError as err:

2846 block, src, srcline = err.args

2847 msg = self.reporter.error('Unexpected indentation.',

2848 source=src, line=srcline)

2849 lines = context + list(block)

2850 paragraph, literalnext = self.paragraph(lines, startline)

2851 self.parent += paragraph

2852 self.parent += msg

2853 if literalnext:

2854 try:

2855 self.state_machine.next_line()

2856 except EOFError:

2857 pass

2858 self.parent += self.literal_block()

2859 return [], next_state, []

2860

2861 def literal_block(self):

2862 """Return a list of nodes."""

2863 (indented, indent, offset, blank_finish

2864 ) = self.state_machine.get_indented()

2865 while indented and not indented[-1].strip():

2866 indented.trim_end()

2867 if not indented:

2868 return self.quoted_literal_block()

2869 data = '\n'.join(indented)

2870 literal_block = nodes.literal_block(data, data)

2871 (literal_block.source,

2872 literal_block.line) = self.state_machine.get_source_and_line(offset+1)

2873 nodelist = [literal_block]

2874 if not blank_finish:

2875 nodelist.append(self.unindent_warning('Literal block'))

2876 return nodelist

2877

2878 def quoted_literal_block(self):

2879 abs_line_offset = self.state_machine.abs_line_offset()

2880 offset = self.state_machine.line_offset

2881 parent_node = nodes.Element()

2882 new_abs_offset = self.nested_parse(

2883 self.state_machine.input_lines[offset:],

2884 input_offset=abs_line_offset, node=parent_node, match_titles=False,

2885 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),

2886 'initial_state': 'QuotedLiteralBlock'})

2887 self.goto_line(new_abs_offset)

2888 return parent_node.children

2889

2890 def definition_list_item(self, termline):

2891 # the parser is already on the second (indented) line:

2892 dd_lineno = self.state_machine.abs_line_number()

2893 dt_lineno = dd_lineno - 1

2894 (indented, indent, line_offset, blank_finish

2895 ) = self.state_machine.get_indented()

2896 dl_item = nodes.definition_list_item(

2897 '\n'.join(termline + list(indented)))

2898 (dl_item.source,

2899 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)

2900 dt_nodes, messages = self.term(termline, dt_lineno)

2901 dl_item += dt_nodes

2902 dd = nodes.definition('', *messages)

2903 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)

2904 dl_item += dd

2905 if termline[0][-2:] == '::':

2906 dd += self.reporter.info(

2907 'Blank line missing before literal block (after the "::")? '

2908 'Interpreted as a definition list item.',

2909 line=dd_lineno)

2910 # TODO: drop a definition if it is an empty comment to allow

2911 # definition list items with several terms?

2912 # https://sourceforge.net/p/docutils/feature-requests/60/

2913 self.nested_parse(indented, input_offset=line_offset, node=dd)

2914 return dl_item, blank_finish

2915

2916 classifier_delimiter = re.compile(' +: +')

2917

2918 def term(self, lines, lineno):

2919 """Return a definition_list's term and optional classifiers."""

2920 assert len(lines) == 1

2921 text_nodes, messages = self.inline_text(lines[0], lineno)

2922 dt = nodes.term(lines[0])

2923 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)

2924 node_list = [dt]

2925 for i in range(len(text_nodes)):

2926 node = text_nodes[i]

2927 if isinstance(node, nodes.Text):

2928 parts = self.classifier_delimiter.split(node)

2929 if len(parts) == 1:

2930 node_list[-1] += node

2931 else:

2932 text = parts[0].rstrip()

2933 textnode = nodes.Text(text)

2934 node_list[-1] += textnode

2935 node_list += [nodes.classifier(unescape(part, True), part)

2936 for part in parts[1:]]

2937 else:

2938 node_list[-1] += node

2939 return node_list, messages

2940

2941

2942class SpecializedText(Text):

2943

2944 """

2945 Superclass for second and subsequent lines of Text-variants.

2946

2947 All transition methods are disabled. Override individual methods in

2948 subclasses to re-enable.

2949 """

2950

2951 def eof(self, context):

2952 """Incomplete construct."""

2953 return []

2954

2955 def invalid_input(self, match=None, context=None, next_state=None):

2956 """Not a compound element member. Abort this state machine."""

2957 raise EOFError

2958

2959 blank = invalid_input

2960 indent = invalid_input

2961 underline = invalid_input

2962 text = invalid_input

2963

2964

2965class Definition(SpecializedText):

2966

2967 """Second line of potential definition_list_item."""

2968

2969 def eof(self, context):

2970 """Not a definition."""

2971 self.state_machine.previous_line(2) # so parent SM can reassess

2972 return []

2973

2974 def indent(self, match, context, next_state):

2975 """Definition list item."""

2976 dl_item, blank_finish = self.definition_list_item(context)

2977 self.parent += dl_item

2978 self.blank_finish = blank_finish

2979 return [], 'DefinitionList', []

2980

2981

2982class Line(SpecializedText):

2983

2984 """

2985 Second line of over- & underlined section title or transition marker.

2986 """

2987

2988 eofcheck = 1 # ignored, will be removed in Docutils 2.0.

2989

2990 def eof(self, context):

2991 """Transition marker at end of section or document."""

2992 marker = context[0].strip()

2993 if len(marker) < 4:

2994 self.state_correction(context)

2995 src, srcline = self.state_machine.get_source_and_line()

2996 # lineno = self.state_machine.abs_line_number() - 1

2997 transition = nodes.transition(rawsource=context[0])

2998 transition.source = src

2999 transition.line = srcline - 1

3000 # transition.line = lineno

3001 self.parent += transition

3002 return []

3003

3004 def blank(self, match, context, next_state):

3005 """Transition marker."""

3006 src, srcline = self.state_machine.get_source_and_line()

3007 marker = context[0].strip()

3008 if len(marker) < 4:

3009 self.state_correction(context)

3010 transition = nodes.transition(rawsource=marker)

3011 transition.source = src

3012 transition.line = srcline - 1

3013 self.parent += transition

3014 return [], 'Body', []

3015

3016 def text(self, match, context, next_state):

3017 """Potential over- & underlined title."""

3018 lineno = self.state_machine.abs_line_number() - 1

3019 overline = context[0]

3020 title = match.string

3021 underline = ''

3022 try:

3023 underline = self.state_machine.next_line()

3024 except EOFError:

3025 blocktext = overline + '\n' + title

3026 if len(overline.rstrip()) < 4:

3027 self.short_overline(context, blocktext, lineno, 2)

3028 else:

3029 msg = self.reporter.error(

3030 'Incomplete section title.',

3031 nodes.literal_block(blocktext, blocktext),

3032 line=lineno)

3033 self.parent += msg

3034 return [], 'Body', []

3035 source = '%s\n%s\n%s' % (overline, title, underline)

3036 overline = overline.rstrip()

3037 underline = underline.rstrip()

3038 if not self.transitions['underline'][0].match(underline):

3039 blocktext = overline + '\n' + title + '\n' + underline

3040 if len(overline.rstrip()) < 4:

3041 self.short_overline(context, blocktext, lineno, 2)

3042 else:

3043 msg = self.reporter.error(

3044 'Missing matching underline for section title overline.',

3045 nodes.literal_block(source, source),

3046 line=lineno)

3047 self.parent += msg

3048 return [], 'Body', []

3049 elif overline != underline:

3050 blocktext = overline + '\n' + title + '\n' + underline

3051 if len(overline.rstrip()) < 4:

3052 self.short_overline(context, blocktext, lineno, 2)

3053 else:

3054 msg = self.reporter.error(

3055 'Title overline & underline mismatch.',

3056 nodes.literal_block(source, source),

3057 line=lineno)

3058 self.parent += msg

3059 return [], 'Body', []

3060 title = title.rstrip()

3061 messages = []

3062 if column_width(title) > len(overline):

3063 blocktext = overline + '\n' + title + '\n' + underline

3064 if len(overline.rstrip()) < 4:

3065 self.short_overline(context, blocktext, lineno, 2)

3066 else:

3067 msg = self.reporter.warning(

3068 'Title overline too short.',

3069 nodes.literal_block(source, source),

3070 line=lineno)

3071 messages.append(msg)

3072 style = (overline[0], underline[0])

3073 self.section(title.lstrip(), source, style, lineno + 1, messages)

3074 return [], 'Body', []

3075

3076 indent = text # indented title

3077

3078 def underline(self, match, context, next_state):

3079 overline = context[0]

3080 blocktext = overline + '\n' + self.state_machine.line

3081 lineno = self.state_machine.abs_line_number() - 1

3082 if len(overline.rstrip()) < 4:

3083 self.short_overline(context, blocktext, lineno, 1)

3084 msg = self.reporter.error(

3085 'Invalid section title or transition marker.',

3086 nodes.literal_block(blocktext, blocktext),

3087 line=lineno)

3088 self.parent += msg

3089 return [], 'Body', []

3090

3091 def short_overline(self, context, blocktext, lineno, lines=1) -> None:

3092 msg = self.reporter.info(

3093 'Possible incomplete section title.\nTreating the overline as '

3094 "ordinary text because it's so short.",

3095 line=lineno)

3096 self.parent += msg

3097 self.state_correction(context, lines)

3098

3099 def state_correction(self, context, lines=1):

3100 self.state_machine.previous_line(lines)

3101 context[:] = []

3102 raise statemachine.StateCorrection('Body', 'text')

3103

3104

3105class QuotedLiteralBlock(RSTState):

3106

3107 """

3108 Nested parse handler for quoted (unindented) literal blocks.

3109

3110 Special-purpose. Not for inclusion in `state_classes`.

3111 """

3112

3113 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,

3114 'text': r''}

3115 initial_transitions = ('initial_quoted', 'text')

3116

3117 def __init__(self, state_machine, debug=False) -> None:

3118 RSTState.__init__(self, state_machine, debug)

3119 self.messages = []

3120 self.initial_lineno = None

3121

3122 def blank(self, match, context, next_state):

3123 if context:

3124 raise EOFError

3125 else:

3126 return context, next_state, []

3127

3128 def eof(self, context):

3129 if context:

3130 src, srcline = self.state_machine.get_source_and_line(

3131 self.initial_lineno)

3132 text = '\n'.join(context)

3133 literal_block = nodes.literal_block(text, text)

3134 literal_block.source = src

3135 literal_block.line = srcline

3136 self.parent += literal_block

3137 else:

3138 self.parent += self.reporter.warning(

3139 'Literal block expected; none found.',

3140 line=self.state_machine.abs_line_number()

3141 ) # src not available, statemachine.input_lines is empty

3142 self.state_machine.previous_line()

3143 self.parent += self.messages

3144 return []

3145

3146 def indent(self, match, context, next_state):

3147 assert context, ('QuotedLiteralBlock.indent: context should not '

3148 'be empty!')

3149 self.messages.append(

3150 self.reporter.error('Unexpected indentation.',

3151 line=self.state_machine.abs_line_number()))

3152 self.state_machine.previous_line()

3153 raise EOFError

3154

3155 def initial_quoted(self, match, context, next_state):

3156 """Match arbitrary quote character on the first line only."""

3157 self.remove_transition('initial_quoted')

3158 quote = match.string[0]

3159 pattern = re.compile(re.escape(quote))

3160 # New transition matches consistent quotes only:

3161 self.add_transition('quoted',

3162 (pattern, self.quoted, self.__class__.__name__))

3163 self.initial_lineno = self.state_machine.abs_line_number()

3164 return [match.string], next_state, []

3165

3166 def quoted(self, match, context, next_state):

3167 """Match consistent quotes on subsequent lines."""

3168 context.append(match.string)

3169 return context, next_state, []

3170

3171 def text(self, match, context, next_state):

3172 if context:

3173 self.messages.append(

3174 self.reporter.error('Inconsistent literal block quoting.',

3175 line=self.state_machine.abs_line_number()))

3176 self.state_machine.previous_line()

3177 raise EOFError

3178

3179

3180state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,

3181 OptionList, LineBlock, ExtensionOptions, Explicit, Text,

3182 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)

3183"""Standard set of State classes used to start `RSTStateMachine`."""