Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/parsers/rst/states.py: 68%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Copyright: This module has been placed in the public domain.

5"""

6This is the ``docutils.parsers.rst.states`` module, the core of

7the reStructuredText parser. It defines the following:

9:Classes:

10 - `RSTStateMachine`: reStructuredText parser's entry point.

11 - `NestedStateMachine`: recursive StateMachine.

12 - `RSTState`: reStructuredText State superclass.

13 - `Inliner`: For parsing inline markup.

14 - `Body`: Generic classifier of the first line of a block.

15 - `SpecializedBody`: Superclass for compound element members.

16 - `BulletList`: Second and subsequent bullet_list list_items

17 - `DefinitionList`: Second+ definition_list_items.

18 - `EnumeratedList`: Second+ enumerated_list list_items.

19 - `FieldList`: Second+ fields.

20 - `OptionList`: Second+ option_list_items.

21 - `RFC2822List`: Second+ RFC2822-style fields.

22 - `ExtensionOptions`: Parses directive option fields.

23 - `Explicit`: Second+ explicit markup constructs.

24 - `SubstitutionDef`: For embedded directives in substitution definitions.

25 - `Text`: Classifier of second line of a text block.

26 - `SpecializedText`: Superclass for continuation lines of Text-variants.

27 - `Definition`: Second line of potential definition_list_item.

28 - `Line`: Second line of overlined section title or transition marker.

29 - `Struct`: obsolete, use `types.SimpleNamespace`.

31:Exception classes:

32 - `MarkupError`

33 - `ParserError`

34 - `MarkupMismatch`

36:Functions:

37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.

38 - `unescape()`: Return a string, nulls removed or restored to backslashes.

40:Attributes:

41 - `state_classes`: set of State classes used with `RSTStateMachine`.

43Parser Overview

44===============

46The reStructuredText parser is implemented as a recursive state machine,

47examining its input one line at a time. To understand how the parser works,

48please first become familiar with the `docutils.statemachine` module. In the

49description below, references are made to classes defined in this module;

50please see the individual classes for details.

52Parsing proceeds as follows:

541. The state machine examines each line of input, checking each of the

55 transition patterns of the state `Body`, in order, looking for a match.

56 The implicit transitions (blank lines and indentation) are checked before

57 any others. The 'text' transition is a catch-all (matches anything).

592. The method associated with the matched transition pattern is called.

61 A. Some transition methods are self-contained, appending elements to the

62 document tree (`Body.doctest` parses a doctest block). The parser's

63 current line index is advanced to the end of the element, and parsing

64 continues with step 1.

66 B. Other transition methods trigger the creation of a nested state machine,

67 whose job is to parse a compound construct ('indent' does a block quote,

68 'bullet' does a bullet list, 'overline' does a section [first checking

69 for a valid section header], etc.).

71 - In the case of lists and explicit markup, a one-off state machine is

72 created and run to parse contents of the first item.

74 - A new state machine is created and its initial state is set to the

75 appropriate specialized state (`BulletList` in the case of the

76 'bullet' transition; see `SpecializedBody` for more detail). This

77 state machine is run to parse the compound element (or series of

78 explicit markup elements), and returns as soon as a non-member element

79 is encountered. For example, the `BulletList` state machine ends as

80 soon as it encounters an element which is not a list item of that

81 bullet list. The optional omission of inter-element blank lines is

82 enabled by this nested state machine.

84 - The current line index is advanced to the end of the elements parsed,

85 and parsing continues with step 1.

87 C. The result of the 'text' transition depends on the next line of text.

88 The current state is changed to `Text`, under which the second line is

89 examined. If the second line is:

91 - Indented: The element is a definition list item, and parsing proceeds

92 similarly to step 2.B, using the `DefinitionList` state.

94 - A line of uniform punctuation characters: The element is a section

95 header; again, parsing proceeds as in step 2.B, and `Body` is still

96 used.

98 - Anything else: The element is a paragraph, which is examined for

99 inline markup and appended to the parent element. Processing

100 continues with step 1.

101"""

102

103from __future__ import annotations

104

105__docformat__ = 'reStructuredText'

106

107import copy

108import re

109from types import FunctionType, MethodType

110from types import SimpleNamespace as Struct

111

112from docutils import nodes, statemachine, utils

113from docutils import ApplicationError, DataError

114from docutils.statemachine import StateMachineWS, StateWS

115from docutils.nodes import fully_normalize_name as normalize_name

116from docutils.nodes import unescape, whitespace_normalize_name

117import docutils.parsers.rst

118from docutils.parsers.rst import directives, languages, tableparser, roles

119from docutils.utils import escape2null, column_width

120from docutils.utils import punctuation_chars, urischemes

121from docutils.utils import split_escaped_whitespace

122from docutils.utils._roman_numerals import (InvalidRomanNumeralError,

123 RomanNumeral)

124

125TYPE_CHECKING = False

126if TYPE_CHECKING:

127 from docutils.statemachine import StringList

128

129

130class MarkupError(DataError): pass

131class UnknownInterpretedRoleError(DataError): pass

132class InterpretedRoleNotImplementedError(DataError): pass

133class ParserError(ApplicationError): pass

134class MarkupMismatch(Exception): pass

135

136

137class RSTStateMachine(StateMachineWS):

138

139 """

140 reStructuredText's master StateMachine.

141

142 The entry point to reStructuredText parsing is the `run()` method.

143 """

144

145 def run(self, input_lines, document, input_offset=0, match_titles=True,

146 inliner=None) -> None:

147 """

148 Parse `input_lines` and modify the `document` node in place.

149

150 Extend `StateMachineWS.run()`: set up parse-global data and

151 run the StateMachine.

152 """

153 self.language = languages.get_language(

154 document.settings.language_code, document.reporter)

155 self.match_titles = match_titles

156 if inliner is None:

157 inliner = Inliner()

158 inliner.init_customizations(document.settings)

159 # A collection of objects to share with nested parsers.

160 # The attributes `reporter`, `section_level`, and

161 # `section_bubble_up_kludge` will be removed in Docutils 2.0

162 self.memo = Struct(document=document,

163 reporter=document.reporter, # ignored

164 language=self.language,

165 title_styles=[],

166 section_level=0, # ignored

167 section_bubble_up_kludge=False, # ignored

168 inliner=inliner)

169 self.document = document

170 self.attach_observer(document.note_source)

171 self.reporter = self.document.reporter

172 self.node = document

173 results = StateMachineWS.run(self, input_lines, input_offset,

174 input_source=document['source'])

175 assert results == [], 'RSTStateMachine.run() results should be empty!'

176 self.node = self.memo = None # remove unneeded references

177

178

179class NestedStateMachine(StateMachineWS):

180 """

181 StateMachine run from within other StateMachine runs, to parse nested

182 document structures.

183 """

184

185 def run(self, input_lines, input_offset, memo, node, match_titles=True):

186 """

187 Parse `input_lines` and populate `node`.

188

189 Use a separate "title style hierarchy" (changed in Docutils 0.23).

190

191 Extend `StateMachineWS.run()`: set up document-wide data.

192 """

193 self.match_titles = match_titles

194 self.memo = copy.copy(memo)

195 self.document = memo.document

196 self.attach_observer(self.document.note_source)

197 self.language = memo.language

198 self.reporter = self.document.reporter

199 self.node = node

200 if match_titles:

201 # Use a separate section title style hierarchy;

202 # ensure all sections in the `input_lines` are treated as

203 # subsections of the current section by blocking lower

204 # section levels with a style that is impossible in rST:

205 self.memo.title_styles = ['x'] * len(node.section_hierarchy())

206 results = StateMachineWS.run(self, input_lines, input_offset)

207 assert results == [], ('NestedStateMachine.run() results should be '

208 'empty!')

209 return results

210

211

212class RSTState(StateWS):

213

214 """

215 reStructuredText State superclass.

216

217 Contains methods used by all State subclasses.

218 """

219

220 nested_sm = NestedStateMachine

221 nested_sm_cache = []

222

223 def __init__(self, state_machine, debug=False) -> None:

224 self.nested_sm_kwargs = {'state_classes': state_classes,

225 'initial_state': 'Body'}

226 StateWS.__init__(self, state_machine, debug)

227

228 def runtime_init(self) -> None:

229 StateWS.runtime_init(self)

230 memo = self.state_machine.memo

231 self.memo = memo

232 self.document = memo.document

233 self.inliner = memo.inliner

234 self.reporter = self.document.reporter

235 # enable the reporter to determine source and source-line

236 if not hasattr(self.reporter, 'get_source_and_line'):

237 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501

238

239 @property

240 def parent(self) -> nodes.Element | None:

241 return self.state_machine.node

242

243 @parent.setter

244 def parent(self, value: nodes.Element):

245 self.state_machine.node = value

246

247 def goto_line(self, abs_line_offset) -> None:

248 """

249 Jump to input line `abs_line_offset`, ignoring jumps past the end.

250 """

251 try:

252 self.state_machine.goto_line(abs_line_offset)

253 except EOFError:

254 pass

255

256 def no_match(self, context, transitions):

257 """

258 Override `StateWS.no_match` to generate a system message.

259

260 This code should never be run.

261 """

262 self.reporter.severe(

263 'Internal error: no transition pattern match. State: "%s"; '

264 'transitions: %s; context: %s; current line: %r.'

265 % (self.__class__.__name__, transitions, context,

266 self.state_machine.line))

267 return context, None, []

268

269 def bof(self, context):

270 """Called at beginning of file."""

271 return [], []

272

273 def nested_parse(self,

274 block: StringList,

275 input_offset: int,

276 node: nodes.Element,

277 match_titles: bool = False,

278 state_machine_class: StateMachineWS|None = None,

279 state_machine_kwargs: dict|None = None

280 ) -> int:

281 """

282 Parse the input `block` with a nested state-machine rooted at `node`.

283

284 :block:

285 reStructuredText source extract.

286 :input_offset:

287 Line number at start of the block.

288 :node:

289 Base node. All generated nodes will be appended to this node.

290 :match_titles:

291 Allow section titles?

292 A separate section title style hierarchy is used for the nested

293 parsing (all sections are subsections of the current section).

294 The calling code should check whether sections are valid

295 children of the base node and move them or warn otherwise.

296 :state_machine_class:

297 Default: `NestedStateMachine`.

298 :state_machine_kwargs:

299 Keyword arguments for the state-machine instantiation.

300 Default: `self.nested_sm_kwargs`.

301

302 Create a new state-machine instance if required.

303 Return new offset.

304 """

305 use_default = 0

306 if state_machine_class is None:

307 state_machine_class = self.nested_sm

308 use_default += 1

309 if state_machine_kwargs is None:

310 state_machine_kwargs = self.nested_sm_kwargs

311 use_default += 1

312 state_machine = None

313 if use_default == 2:

314 try:

315 state_machine = self.nested_sm_cache.pop()

316 except IndexError:

317 pass

318 if not state_machine:

319 state_machine = state_machine_class(debug=self.debug,

320 **state_machine_kwargs)

321 # run the statemachine and populate `node`:

322 block_length = len(block)

323 state_machine.run(block, input_offset, memo=self.memo,

324 node=node, match_titles=match_titles)

325 # clean up

326 if use_default == 2:

327 self.nested_sm_cache.append(state_machine)

328 else:

329 state_machine.unlink()

330 new_offset = state_machine.abs_line_offset()

331 # No `block.parent` implies disconnected -- lines aren't in sync:

332 if block.parent and (len(block) - block_length) != 0:

333 # Adjustment for block if modified in nested parse:

334 self.state_machine.next_line(len(block) - block_length)

335 return new_offset

336

337 def nested_list_parse(self, block, input_offset, node, initial_state,

338 blank_finish,

339 blank_finish_state=None,

340 extra_settings={},

341 match_titles=False,

342 state_machine_class=None,

343 state_machine_kwargs=None):

344 """

345 Parse the input `block` with a nested state-machine rooted at `node`.

346

347 Create a new StateMachine rooted at `node` and run it over the

348 input `block` (see also `nested_parse()`).

349 Also keep track of optional intermediate blank lines and the

350 required final one.

351

352 Return new offset and a boolean indicating whether there was a

353 blank final line.

354 """

355 if state_machine_class is None:

356 state_machine_class = self.nested_sm

357 if state_machine_kwargs is None:

358 state_machine_kwargs = self.nested_sm_kwargs.copy()

359 state_machine_kwargs['initial_state'] = initial_state

360 state_machine = state_machine_class(debug=self.debug,

361 **state_machine_kwargs)

362 if blank_finish_state is None:

363 blank_finish_state = initial_state

364 state_machine.states[blank_finish_state].blank_finish = blank_finish

365 for key, value in extra_settings.items():

366 setattr(state_machine.states[initial_state], key, value)

367 state_machine.run(block, input_offset, memo=self.memo,

368 node=node, match_titles=match_titles)

369 blank_finish = state_machine.states[blank_finish_state].blank_finish

370 state_machine.unlink()

371 return state_machine.abs_line_offset(), blank_finish

372

373 def section(self, title, source, style, lineno, messages) -> None:

374 """Check for a valid subsection and create one if it checks out."""

375 if self.check_subsection(source, style, lineno):

376 self.new_subsection(title, lineno, messages)

377

378 def check_subsection(self, source, style, lineno) -> bool:

379 """

380 Check for a valid subsection header. Update section data in `memo`.

381

382 When a new section is reached that isn't a subsection of the current

383 section, set `self.parent` to the new section's parent section

384 (or the root node if the new section is a top-level section).

385 """

386 title_styles = self.memo.title_styles

387 parent_sections = self.parent.section_hierarchy()

388 # current section level: (0 root, 1 section, 2 subsection, ...)

389 oldlevel = len(parent_sections)

390 # new section level:

391 try: # check for existing title style

392 newlevel = title_styles.index(style) + 1

393 except ValueError: # new title style

394 newlevel = len(title_styles) + 1

395 # The new level must not be deeper than an immediate child

396 # of the current level:

397 if newlevel > oldlevel + 1:

398 styles = ' '.join('/'.join(style) for style in title_styles)

399 self.parent += self.reporter.error(

400 'Inconsistent title style:'

401 f' skip from level {oldlevel} to {newlevel}.',

402 nodes.literal_block('', source),

403 nodes.paragraph('', f'Established title styles: {styles}'),

404 line=lineno)

405 return False

406 # Update parent state:

407 if newlevel > len(title_styles):

408 title_styles.append(style)

409 self.memo.section_level = newlevel

410 if newlevel <= oldlevel:

411 # new section is sibling or higher up in the section hierarchy

412 self.parent = parent_sections[newlevel-1].parent

413 return True

414

415 def title_inconsistent(self, sourcetext, lineno):

416 # Ignored. Will be removed in Docutils 2.0.

417 error = self.reporter.error(

418 'Title level inconsistent:', nodes.literal_block('', sourcetext),

419 line=lineno)

420 return error

421

422 def new_subsection(self, title, lineno, messages):

423 """Append new subsection to document tree."""

424 section_node = nodes.section()

425 self.parent += section_node

426 textnodes, title_messages = self.inline_text(title, lineno)

427 titlenode = nodes.title(title, '', *textnodes)

428 name = normalize_name(titlenode.astext())

429 section_node['names'].append(name)

430 section_node += titlenode

431 section_node += messages

432 section_node += title_messages

433 self.document.note_implicit_target(section_node, section_node)

434 # Update state:

435 self.parent = section_node

436

437 def paragraph(self, lines, lineno):

438 """

439 Return a list (paragraph & messages) & a boolean: literal_block next?

440 """

441 data = '\n'.join(lines).rstrip()

442 if re.search(r'(?<!\\)(\\\\)*::$', data):

443 if len(data) == 2:

444 return [], 1

445 elif data[-3] in ' \n':

446 text = data[:-3].rstrip()

447 else:

448 text = data[:-1]

449 literalnext = 1

450 else:

451 text = data

452 literalnext = 0

453 textnodes, messages = self.inline_text(text, lineno)

454 p = nodes.paragraph(data, '', *textnodes)

455 p.source, p.line = self.state_machine.get_source_and_line(lineno)

456 return [p] + messages, literalnext

457

458 def inline_text(self, text, lineno):

459 """

460 Return 2 lists: nodes (text and inline elements), and system_messages.

461 """

462 nodes, messages = self.inliner.parse(text, lineno,

463 self.memo, self.parent)

464 return nodes, messages

465

466 def unindent_warning(self, node_name):

467 # the actual problem is one line below the current line

468 lineno = self.state_machine.abs_line_number() + 1

469 return self.reporter.warning('%s ends without a blank line; '

470 'unexpected unindent.' % node_name,

471 line=lineno)

472

473

474def build_regexp(definition, compile_patterns=True):

475 """

476 Build, compile and return a regular expression based on `definition`.

477

478 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),

479 where "parts" is a list of regular expressions and/or regular

480 expression definitions to be joined into an or-group.

481 """

482 name, prefix, suffix, parts = definition

483 part_strings = []

484 for part in parts:

485 if isinstance(part, tuple):

486 part_strings.append(build_regexp(part, None))

487 else:

488 part_strings.append(part)

489 or_group = '|'.join(part_strings)

490 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()

491 if compile_patterns:

492 return re.compile(regexp)

493 else:

494 return regexp

495

496

497class Inliner:

498

499 """

500 Parse inline markup; call the `parse()` method.

501 """

502

503 def __init__(self) -> None:

504 self.implicit_dispatch = []

505 """List of (pattern, bound method) tuples, used by

506 `self.implicit_inline`."""

507

508 def init_customizations(self, settings) -> None:

509 # lookahead and look-behind expressions for inline markup rules

510 if getattr(settings, 'character_level_inline_markup', False):

511 start_string_prefix = '(^|(?<!\x00))'

512 end_string_suffix = ''

513 else:

514 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %

515 (punctuation_chars.openers,

516 punctuation_chars.delimiters))

517 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %

518 (punctuation_chars.closing_delimiters,

519 punctuation_chars.delimiters,

520 punctuation_chars.closers))

521 args = locals().copy()

522 args.update(vars(self.__class__))

523

524 parts = ('initial_inline', start_string_prefix, '',

525 [

526 ('start', '', self.non_whitespace_after, # simple start-strings

527 [r'\*\*', # strong

528 r'\*(?!\*)', # emphasis but not strong

529 r'``', # literal

530 r'_`', # inline internal target

531 r'\|(?!\|)'] # substitution reference

532 ),

533 ('whole', '', end_string_suffix, # whole constructs

534 [ # reference name & end-string

535 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,

536 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',

537 [r'[0-9]+', # manually numbered

538 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)

539 r'\*', # auto-symbol

540 r'(?P<citationlabel>%s)' % self.simplename, # citation ref

541 ]

542 )

543 ]

544 ),

545 ('backquote', # interpreted text or phrase reference

546 '(?P<role>(:%s:)?)' % self.simplename, # optional role

547 self.non_whitespace_after,

548 ['`(?!`)'] # but not literal

549 )

550 ]

551 )

552 self.start_string_prefix = start_string_prefix

553 self.end_string_suffix = end_string_suffix

554 self.parts = parts

555

556 self.patterns = Struct(

557 initial=build_regexp(parts),

558 emphasis=re.compile(self.non_whitespace_escape_before

559 + r'(\*)' + end_string_suffix),

560 strong=re.compile(self.non_whitespace_escape_before

561 + r'(\*\*)' + end_string_suffix),

562 interpreted_or_phrase_ref=re.compile(

563 r"""

564 %(non_unescaped_whitespace_escape_before)s

565 (

566 `

567 (?P<suffix>

568 (?P<role>:%(simplename)s:)?

569 (?P<refend>__?)?

570 )

571 )

572 %(end_string_suffix)s

573 """ % args, re.VERBOSE),

574 embedded_link=re.compile(

575 r"""

576 (

577 (?:[ \n]+|^) # spaces or beginning of line/string

578 < # open bracket

579 %(non_whitespace_after)s

580 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets

581 %(non_whitespace_escape_before)s

582 > # close bracket

583 )

584 $ # end of string

585 """ % args, re.VERBOSE),

586 literal=re.compile(self.non_whitespace_before + '(``)'

587 + end_string_suffix),

588 target=re.compile(self.non_whitespace_escape_before

589 + r'(`)' + end_string_suffix),

590 substitution_ref=re.compile(self.non_whitespace_escape_before

591 + r'(\|_{0,2})'

592 + end_string_suffix),

593 email=re.compile(self.email_pattern % args + '$',

594 re.VERBOSE),

595 uri=re.compile(

596 (r"""

597 %(start_string_prefix)s

598 (?P<whole>

599 (?P<absolute> # absolute URI

600 (?P<scheme> # scheme (http, ftp, mailto)

601 [a-zA-Z][a-zA-Z0-9.+-]*

602 )

603 :

604 (

605 ( # either:

606 (//?)? # hierarchical URI

607 %(uric)s* # URI characters

608 %(uri_end)s # final URI char

609 )

610 ( # optional query

611 \?%(uric)s*

612 %(uri_end)s

613 )?

614 ( # optional fragment

615 \#%(uric)s*

616 %(uri_end)s

617 )?

618 )

619 )

620 | # *OR*

621 (?P<email> # email address

622 """ + self.email_pattern + r"""

623 )

624 )

625 %(end_string_suffix)s

626 """) % args, re.VERBOSE),

627 pep=re.compile(

628 r"""

629 %(start_string_prefix)s

630 (

631 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file

632 |

633 (PEP\s+(?P<pepnum2>\d+)) # reference by name

634 )

635 %(end_string_suffix)s""" % args, re.VERBOSE),

636 rfc=re.compile(

637 r"""

638 %(start_string_prefix)s

639 (RFC(-|\s+)?(?P<rfcnum>\d+))

640 %(end_string_suffix)s""" % args, re.VERBOSE))

641

642 self.implicit_dispatch.append((self.patterns.uri,

643 self.standalone_uri))

644 if settings.pep_references:

645 self.implicit_dispatch.append((self.patterns.pep,

646 self.pep_reference))

647 if settings.rfc_references:

648 self.implicit_dispatch.append((self.patterns.rfc,

649 self.rfc_reference))

650

651 def parse(self, text, lineno, memo, parent):

652 # Needs to be refactored for nested inline markup.

653 # Add nested_parse() method?

654 """

655 Return 2 lists: nodes (text and inline elements), and system_messages.

656

657 Using `self.patterns.initial`, a pattern which matches start-strings

658 (emphasis, strong, interpreted, phrase reference, literal,

659 substitution reference, and inline target) and complete constructs

660 (simple reference, footnote reference), search for a candidate. When

661 one is found, check for validity (e.g., not a quoted '*' character).

662 If valid, search for the corresponding end string if applicable, and

663 check it for validity. If not found or invalid, generate a warning

664 and ignore the start-string. Implicit inline markup (e.g. standalone

665 URIs) is found last.

666

667 :text: source string

668 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`

669 """

670 self.document = memo.document

671 self.language = memo.language

672 self.reporter = self.document.reporter

673 self.parent = parent

674 pattern_search = self.patterns.initial.search

675 dispatch = self.dispatch

676 remaining = escape2null(text)

677 processed = []

678 unprocessed = []

679 messages = []

680 while remaining:

681 match = pattern_search(remaining)

682 if match:

683 groups = match.groupdict()

684 method = dispatch[groups['start'] or groups['backquote']

685 or groups['refend'] or groups['fnend']]

686 before, inlines, remaining, sysmessages = method(self, match,

687 lineno)

688 unprocessed.append(before)

689 messages += sysmessages

690 if inlines:

691 processed += self.implicit_inline(''.join(unprocessed),

692 lineno)

693 processed += inlines

694 unprocessed = []

695 else:

696 break

697 remaining = ''.join(unprocessed) + remaining

698 if remaining:

699 processed += self.implicit_inline(remaining, lineno)

700 return processed, messages

701

702 # Inline object recognition

703 # -------------------------

704 # See also init_customizations().

705 non_whitespace_before = r'(?<!\s)'

706 non_whitespace_escape_before = r'(?<![\s\x00])'

707 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'

708 non_whitespace_after = r'(?!\s)'

709 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):

710 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'

711 # Valid URI characters (see RFC 2396 & RFC 2732);

712 # final \x00 allows backslash escapes in URIs:

713 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""

714 # Delimiter indicating the end of a URI (not part of the URI):

715 uri_end_delim = r"""[>]"""

716 # Last URI character; same as uric but no punctuation:

717 urilast = r"""[_~*/=+a-zA-Z0-9]"""

718 # End of a URI (either 'urilast' or 'uric followed by a

719 # uri_end_delim'):

720 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()

721 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""

722 email_pattern = r"""

723 %(emailc)s+(?:\.%(emailc)s+)* # name

724 (?<!\x00)@ # at

725 %(emailc)s+(?:\.%(emailc)s*)* # host

726 %(uri_end)s # final URI char

727 """

728

729 def quoted_start(self, match):

730 """Test if inline markup start-string is 'quoted'.

731

732 'Quoted' in this context means the start-string is enclosed in a pair

733 of matching opening/closing delimiters (not necessarily quotes)

734 or at the end of the match.

735 """

736 string = match.string

737 start = match.start()

738 if start == 0: # start-string at beginning of text

739 return False

740 prestart = string[start - 1]

741 try:

742 poststart = string[match.end()]

743 except IndexError: # start-string at end of text

744 return True # not "quoted" but no markup start-string either

745 return punctuation_chars.match_chars(prestart, poststart)

746

747 def inline_obj(self, match, lineno, end_pattern, nodeclass,

748 restore_backslashes=False):

749 string = match.string

750 matchstart = match.start('start')

751 matchend = match.end('start')

752 if self.quoted_start(match):

753 return string[:matchend], [], string[matchend:], [], ''

754 endmatch = end_pattern.search(string[matchend:])

755 if endmatch and endmatch.start(1): # 1 or more chars

756 text = endmatch.string[:endmatch.start(1)]

757 if restore_backslashes:

758 text = unescape(text, True)

759 textend = matchend + endmatch.end(1)

760 rawsource = unescape(string[matchstart:textend], True)

761 node = nodeclass(rawsource, text)

762 return (string[:matchstart], [node],

763 string[textend:], [], endmatch.group(1))

764 msg = self.reporter.warning(

765 'Inline %s start-string without end-string.'

766 % nodeclass.__name__, line=lineno)

767 text = unescape(string[matchstart:matchend], True)

768 prb = self.problematic(text, text, msg)

769 return string[:matchstart], [prb], string[matchend:], [msg], ''

770

771 def problematic(self, text, rawsource, message):

772 msgid = self.document.set_id(message, self.parent)

773 problematic = nodes.problematic(rawsource, text, refid=msgid)

774 prbid = self.document.set_id(problematic)

775 message.add_backref(prbid)

776 return problematic

777

778 def emphasis(self, match, lineno):

779 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

780 match, lineno, self.patterns.emphasis, nodes.emphasis)

781 return before, inlines, remaining, sysmessages

782

783 def strong(self, match, lineno):

784 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

785 match, lineno, self.patterns.strong, nodes.strong)

786 return before, inlines, remaining, sysmessages

787

788 def interpreted_or_phrase_ref(self, match, lineno):

789 end_pattern = self.patterns.interpreted_or_phrase_ref

790 string = match.string

791 matchstart = match.start('backquote')

792 matchend = match.end('backquote')

793 rolestart = match.start('role')

794 role = match.group('role')

795 position = ''

796 if role:

797 role = role[1:-1]

798 position = 'prefix'

799 elif self.quoted_start(match):

800 return string[:matchend], [], string[matchend:], []

801 endmatch = end_pattern.search(string[matchend:])

802 if endmatch and endmatch.start(1): # 1 or more chars

803 textend = matchend + endmatch.end()

804 if endmatch.group('role'):

805 if role:

806 msg = self.reporter.warning(

807 'Multiple roles in interpreted text (both '

808 'prefix and suffix present; only one allowed).',

809 line=lineno)

810 text = unescape(string[rolestart:textend], True)

811 prb = self.problematic(text, text, msg)

812 return string[:rolestart], [prb], string[textend:], [msg]

813 role = endmatch.group('suffix')[1:-1]

814 position = 'suffix'

815 escaped = endmatch.string[:endmatch.start(1)]

816 rawsource = unescape(string[matchstart:textend], True)

817 if rawsource[-1:] == '_':

818 if role:

819 msg = self.reporter.warning(

820 'Mismatch: both interpreted text role %s and '

821 'reference suffix.' % position, line=lineno)

822 text = unescape(string[rolestart:textend], True)

823 prb = self.problematic(text, text, msg)

824 return string[:rolestart], [prb], string[textend:], [msg]

825 return self.phrase_ref(string[:matchstart], string[textend:],

826 rawsource, escaped)

827 else:

828 rawsource = unescape(string[rolestart:textend], True)

829 nodelist, messages = self.interpreted(rawsource, escaped, role,

830 lineno)

831 return (string[:rolestart], nodelist,

832 string[textend:], messages)

833 msg = self.reporter.warning(

834 'Inline interpreted text or phrase reference start-string '

835 'without end-string.', line=lineno)

836 text = unescape(string[matchstart:matchend], True)

837 prb = self.problematic(text, text, msg)

838 return string[:matchstart], [prb], string[matchend:], [msg]

839

840 def phrase_ref(self, before, after, rawsource, escaped, text=None):

841 # `text` is ignored (since 0.16)

842 match = self.patterns.embedded_link.search(escaped)

843 if match: # embedded <URI> or <alias_>

844 text = escaped[:match.start(0)]

845 unescaped = unescape(text)

846 rawtext = unescape(text, True)

847 aliastext = match.group(2)

848 rawaliastext = unescape(aliastext, True)

849 underscore_escaped = rawaliastext.endswith(r'\_')

850 if (aliastext.endswith('_')

851 and not (underscore_escaped

852 or self.patterns.uri.match(aliastext))):

853 aliastype = 'name'

854 alias = normalize_name(unescape(aliastext[:-1]))

855 target = nodes.target(match.group(1), refname=alias)

856 target.indirect_reference_name = whitespace_normalize_name(

857 unescape(aliastext[:-1]))

858 else:

859 aliastype = 'uri'

860 # remove unescaped whitespace

861 alias_parts = split_escaped_whitespace(match.group(2))

862 alias = ' '.join(''.join(part.split())

863 for part in alias_parts)

864 alias = self.adjust_uri(unescape(alias))

865 if alias.endswith(r'\_'):

866 alias = alias[:-2] + '_'

867 target = nodes.target(match.group(1), refuri=alias)

868 target.referenced = 1

869 if not aliastext:

870 raise ApplicationError('problem with embedded link: %r'

871 % aliastext)

872 if not text:

873 text = alias

874 unescaped = unescape(text)

875 rawtext = rawaliastext

876 else:

877 text = escaped

878 unescaped = unescape(text)

879 target = None

880 rawtext = unescape(escaped, True)

881

882 refname = normalize_name(unescaped)

883 reference = nodes.reference(rawsource, text,

884 name=whitespace_normalize_name(unescaped))

885 reference[0].rawsource = rawtext

886

887 node_list = [reference]

888

889 if rawsource[-2:] == '__':

890 if target and (aliastype == 'name'):

891 reference['refname'] = alias

892 self.document.note_refname(reference)

893 # self.document.note_indirect_target(target) # required?

894 elif target and (aliastype == 'uri'):

895 reference['refuri'] = alias

896 else:

897 reference['anonymous'] = True

898 else:

899 if target:

900 target['names'].append(refname)

901 if aliastype == 'name':

902 reference['refname'] = alias

903 self.document.note_indirect_target(target)

904 self.document.note_refname(reference)

905 else:

906 reference['refuri'] = alias

907 # target.note_referenced_by(name=refname)

908 self.document.note_implicit_target(target, self.parent)

909 node_list.append(target)

910 else:

911 reference['refname'] = refname

912 self.document.note_refname(reference)

913 return before, node_list, after, []

914

915 def adjust_uri(self, uri):

916 match = self.patterns.email.match(uri)

917 if match:

918 return 'mailto:' + uri

919 else:

920 return uri

921

922 def interpreted(self, rawsource, text, role, lineno):

923 role_fn, messages = roles.role(role, self.language, lineno,

924 self.reporter)

925 if role_fn:

926 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)

927 return nodes, messages + messages2

928 else:

929 msg = self.reporter.error(

930 'Unknown interpreted text role "%s".' % role,

931 line=lineno)

932 return ([self.problematic(rawsource, rawsource, msg)],

933 messages + [msg])

934

935 def literal(self, match, lineno):

936 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

937 match, lineno, self.patterns.literal, nodes.literal,

938 restore_backslashes=True)

939 return before, inlines, remaining, sysmessages

940

941 def inline_internal_target(self, match, lineno):

942 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

943 match, lineno, self.patterns.target, nodes.target)

944 if inlines and isinstance(inlines[0], nodes.target):

945 assert len(inlines) == 1

946 target = inlines[0]

947 name = normalize_name(target.astext())

948 target['names'].append(name)

949 self.document.note_explicit_target(target, self.parent)

950 return before, inlines, remaining, sysmessages

951

952 def substitution_reference(self, match, lineno):

953 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

954 match, lineno, self.patterns.substitution_ref,

955 nodes.substitution_reference)

956 if len(inlines) == 1:

957 subref_node = inlines[0]

958 if isinstance(subref_node, nodes.substitution_reference):

959 subref_text = subref_node.astext()

960 self.document.note_substitution_ref(subref_node, subref_text)

961 if endstring[-1:] == '_':

962 reference_node = nodes.reference(

963 '|%s%s' % (subref_text, endstring), '')

964 if endstring[-2:] == '__':

965 reference_node['anonymous'] = True

966 else:

967 reference_node['refname'] = normalize_name(subref_text)

968 self.document.note_refname(reference_node)

969 reference_node += subref_node

970 inlines = [reference_node]

971 return before, inlines, remaining, sysmessages

972

973 def footnote_reference(self, match, lineno):

974 """

975 Handles `nodes.footnote_reference` and `nodes.citation_reference`

976 elements.

977 """

978 label = match.group('footnotelabel')

979 refname = normalize_name(label)

980 string = match.string

981 before = string[:match.start('whole')]

982 remaining = string[match.end('whole'):]

983 if match.group('citationlabel'):

984 refnode = nodes.citation_reference('[%s]_' % label,

985 refname=refname)

986 refnode += nodes.Text(label)

987 self.document.note_citation_ref(refnode)

988 else:

989 refnode = nodes.footnote_reference('[%s]_' % label)

990 if refname[0] == '#':

991 refname = refname[1:]

992 refnode['auto'] = 1

993 self.document.note_autofootnote_ref(refnode)

994 elif refname == '*':

995 refname = ''

996 refnode['auto'] = '*'

997 self.document.note_symbol_footnote_ref(

998 refnode)

999 else:

1000 refnode += nodes.Text(label)

1001 if refname:

1002 refnode['refname'] = refname

1003 self.document.note_footnote_ref(refnode)

1004 if utils.get_trim_footnote_ref_space(self.document.settings):

1005 before = before.rstrip()

1006 return before, [refnode], remaining, []

1007

1008 def reference(self, match, lineno, anonymous=False):

1009 referencename = match.group('refname')

1010 refname = normalize_name(referencename)

1011 referencenode = nodes.reference(

1012 referencename + match.group('refend'), referencename,

1013 name=whitespace_normalize_name(referencename))

1014 referencenode[0].rawsource = referencename

1015 if anonymous:

1016 referencenode['anonymous'] = True

1017 else:

1018 referencenode['refname'] = refname

1019 self.document.note_refname(referencenode)

1020 string = match.string

1021 matchstart = match.start('whole')

1022 matchend = match.end('whole')

1023 return string[:matchstart], [referencenode], string[matchend:], []

1024

1025 def anonymous_reference(self, match, lineno):

1026 return self.reference(match, lineno, anonymous=True)

1027

1028 def standalone_uri(self, match, lineno):

1029 if (not match.group('scheme')

1030 or match.group('scheme').lower() in urischemes.schemes):

1031 if match.group('email'):

1032 addscheme = 'mailto:'

1033 else:

1034 addscheme = ''

1035 text = match.group('whole')

1036 refuri = addscheme + unescape(text)

1037 reference = nodes.reference(unescape(text, True), text,

1038 refuri=refuri)

1039 return [reference]

1040 else: # not a valid scheme

1041 raise MarkupMismatch

1042

1043 def pep_reference(self, match, lineno):

1044 text = match.group(0)

1045 if text.startswith('pep-'):

1046 pepnum = int(unescape(match.group('pepnum1')))

1047 elif text.startswith('PEP'):

1048 pepnum = int(unescape(match.group('pepnum2')))

1049 else:

1050 raise MarkupMismatch

1051 ref = (self.document.settings.pep_base_url

1052 + self.document.settings.pep_file_url_template % pepnum)

1053 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1054

1055 rfc_url = 'rfc%d.html'

1056

1057 def rfc_reference(self, match, lineno):

1058 text = match.group(0)

1059 if text.startswith('RFC'):

1060 rfcnum = int(unescape(match.group('rfcnum')))

1061 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum

1062 else:

1063 raise MarkupMismatch

1064 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1065

1066 def implicit_inline(self, text, lineno):

1067 """

1068 Check each of the patterns in `self.implicit_dispatch` for a match,

1069 and dispatch to the stored method for the pattern. Recursively check

1070 the text before and after the match. Return a list of `nodes.Text`

1071 and inline element nodes.

1072 """

1073 if not text:

1074 return []

1075 for pattern, method in self.implicit_dispatch:

1076 match = pattern.search(text)

1077 if match:

1078 try:

1079 # Must recurse on strings before *and* after the match;

1080 # there may be multiple patterns.

1081 return (self.implicit_inline(text[:match.start()], lineno)

1082 + method(match, lineno)

1083 + self.implicit_inline(text[match.end():], lineno))

1084 except MarkupMismatch:

1085 pass

1086 return [nodes.Text(text)]

1087

1088 dispatch = {'*': emphasis,

1089 '**': strong,

1090 '`': interpreted_or_phrase_ref,

1091 '``': literal,

1092 '_`': inline_internal_target,

1093 ']_': footnote_reference,

1094 '|': substitution_reference,

1095 '_': reference,

1096 '__': anonymous_reference}

1097

1098

1099def _loweralpha_to_int(s, _zero=(ord('a')-1)):

1100 return ord(s) - _zero

1101

1102

1103def _upperalpha_to_int(s, _zero=(ord('A')-1)):

1104 return ord(s) - _zero

1105

1106

1107class Body(RSTState):

1108

1109 """

1110 Generic classifier of the first line of a block.

1111 """

1112

1113 double_width_pad_char = tableparser.TableParser.double_width_pad_char

1114 """Padding character for East Asian double-width text."""

1115

1116 enum = Struct()

1117 """Enumerated list parsing information."""

1118

1119 enum.formatinfo = {

1120 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),

1121 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),

1122 'period': Struct(prefix='', suffix='.', start=0, end=-1)}

1123 enum.formats = enum.formatinfo.keys()

1124 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',

1125 'lowerroman', 'upperroman'] # ORDERED!

1126 enum.sequencepats = {'arabic': '[0-9]+',

1127 'loweralpha': '[a-z]',

1128 'upperalpha': '[A-Z]',

1129 'lowerroman': '[ivxlcdm]+',

1130 'upperroman': '[IVXLCDM]+'}

1131 enum.converters = {'arabic': int,

1132 'loweralpha': _loweralpha_to_int,

1133 'upperalpha': _upperalpha_to_int,

1134 'lowerroman': RomanNumeral.from_string,

1135 'upperroman': RomanNumeral.from_string}

1136

1137 enum.sequenceregexps = {}

1138 for sequence in enum.sequences:

1139 enum.sequenceregexps[sequence] = re.compile(

1140 enum.sequencepats[sequence] + '$')

1141

1142 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')

1143 """Matches the top (& bottom) of a full table)."""

1144

1145 simple_table_top_pat = re.compile('=+( +=+)+ *$')

1146 """Matches the top of a simple table."""

1147

1148 simple_table_border_pat = re.compile('=+[ =]*$')

1149 """Matches the bottom & header bottom of a simple table."""

1150

1151 pats = {}

1152 """Fragments of patterns used by transitions."""

1153

1154 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'

1155 pats['alpha'] = '[a-zA-Z]'

1156 pats['alphanum'] = '[a-zA-Z0-9]'

1157 pats['alphanumplus'] = '[a-zA-Z0-9_-]'

1158 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'

1159 '|%(upperroman)s|#)' % enum.sequencepats)

1160 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats

1161 # @@@ Loosen up the pattern? Allow Unicode?

1162 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats

1163 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats

1164 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats

1165 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats

1166

1167 for format in enum.formats:

1168 pats[format] = '(?P<%s>%s%s%s)' % (

1169 format, re.escape(enum.formatinfo[format].prefix),

1170 pats['enum'], re.escape(enum.formatinfo[format].suffix))

1171

1172 patterns = {

1173 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',

1174 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,

1175 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',

1176 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,

1177 'doctest': r'>>>( +|$)',

1178 'line_block': r'\|( +|$)',

1179 'grid_table_top': grid_table_top_pat,

1180 'simple_table_top': simple_table_top_pat,

1181 'explicit_markup': r'\.\.( +|$)',

1182 'anonymous': r'__( +|$)',

1183 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,

1184 'text': r''}

1185 initial_transitions = (

1186 'bullet',

1187 'enumerator',

1188 'field_marker',

1189 'option_marker',

1190 'doctest',

1191 'line_block',

1192 'grid_table_top',

1193 'simple_table_top',

1194 'explicit_markup',

1195 'anonymous',

1196 'line',

1197 'text')

1198

1199 def indent(self, match, context, next_state):

1200 """Block quote."""

1201 (indented, indent, line_offset, blank_finish

1202 ) = self.state_machine.get_indented()

1203 elements = self.block_quote(indented, line_offset)

1204 self.parent += elements

1205 if not blank_finish:

1206 self.parent += self.unindent_warning('Block quote')

1207 return context, next_state, []

1208

1209 def block_quote(self, indented, line_offset):

1210 elements = []

1211 while indented:

1212 blockquote = nodes.block_quote(rawsource='\n'.join(indented))

1213 (blockquote.source, blockquote.line

1214 ) = self.state_machine.get_source_and_line(line_offset+1)

1215 (blockquote_lines,

1216 attribution_lines,

1217 attribution_offset,

1218 indented,

1219 new_line_offset) = self.split_attribution(indented, line_offset)

1220 self.nested_parse(blockquote_lines, line_offset, blockquote)

1221 elements.append(blockquote)

1222 if attribution_lines:

1223 attribution, messages = self.parse_attribution(

1224 attribution_lines, line_offset+attribution_offset)

1225 blockquote += attribution

1226 elements += messages

1227 line_offset = new_line_offset

1228 while indented and not indented[0]:

1229 indented = indented[1:]

1230 line_offset += 1

1231 return elements

1232

1233 # U+2014 is an em-dash:

1234 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')

1235

1236 def split_attribution(self, indented, line_offset):

1237 """

1238 Check for a block quote attribution and split it off:

1239

1240 * First line after a blank line must begin with a dash ("--", "---",

1241 em-dash; matches `self.attribution_pattern`).

1242 * Every line after that must have consistent indentation.

1243 * Attributions must be preceded by block quote content.

1244

1245 Return a tuple of: (block quote content lines, attribution lines,

1246 attribution offset, remaining indented lines, remaining lines offset).

1247 """

1248 blank = None

1249 nonblank_seen = False

1250 for i in range(len(indented)):

1251 line = indented[i].rstrip()

1252 if line:

1253 if nonblank_seen and blank == i - 1: # last line blank

1254 match = self.attribution_pattern.match(line)

1255 if match:

1256 attribution_end, indent = self.check_attribution(

1257 indented, i)

1258 if attribution_end:

1259 a_lines = indented[i:attribution_end]

1260 a_lines.trim_left(match.end(), end=1)

1261 a_lines.trim_left(indent, start=1)

1262 return (indented[:i], a_lines,

1263 i, indented[attribution_end:],

1264 line_offset + attribution_end)

1265 nonblank_seen = True

1266 else:

1267 blank = i

1268 else:

1269 return indented, None, None, None, None

1270

1271 def check_attribution(self, indented, attribution_start):

1272 """

1273 Check attribution shape.

1274 Return the index past the end of the attribution, and the indent.

1275 """

1276 indent = None

1277 i = attribution_start + 1

1278 for i in range(attribution_start + 1, len(indented)):

1279 line = indented[i].rstrip()

1280 if not line:

1281 break

1282 if indent is None:

1283 indent = len(line) - len(line.lstrip())

1284 elif len(line) - len(line.lstrip()) != indent:

1285 return None, None # bad shape; not an attribution

1286 else:

1287 # return index of line after last attribution line:

1288 i += 1

1289 return i, (indent or 0)

1290

1291 def parse_attribution(self, indented, line_offset):

1292 text = '\n'.join(indented).rstrip()

1293 lineno = 1 + line_offset # line_offset is zero-based

1294 textnodes, messages = self.inline_text(text, lineno)

1295 node = nodes.attribution(text, '', *textnodes)

1296 node.source, node.line = self.state_machine.get_source_and_line(lineno)

1297 return node, messages

1298

1299 def bullet(self, match, context, next_state):

1300 """Bullet list item."""

1301 ul = nodes.bullet_list()

1302 ul.source, ul.line = self.state_machine.get_source_and_line()

1303 self.parent += ul

1304 ul['bullet'] = match.string[0]

1305 i, blank_finish = self.list_item(match.end())

1306 ul += i

1307 offset = self.state_machine.line_offset + 1 # next line

1308 new_line_offset, blank_finish = self.nested_list_parse(

1309 self.state_machine.input_lines[offset:],

1310 input_offset=self.state_machine.abs_line_offset() + 1,

1311 node=ul, initial_state='BulletList',

1312 blank_finish=blank_finish)

1313 self.goto_line(new_line_offset)

1314 if not blank_finish:

1315 self.parent += self.unindent_warning('Bullet list')

1316 return [], next_state, []

1317

1318 def list_item(self, indent):

1319 src, srcline = self.state_machine.get_source_and_line()

1320 if self.state_machine.line[indent:]:

1321 indented, line_offset, blank_finish = (

1322 self.state_machine.get_known_indented(indent))

1323 else:

1324 indented, indent, line_offset, blank_finish = (

1325 self.state_machine.get_first_known_indented(indent))

1326 listitem = nodes.list_item('\n'.join(indented))

1327 listitem.source, listitem.line = src, srcline

1328 if indented:

1329 self.nested_parse(indented, input_offset=line_offset,

1330 node=listitem)

1331 return listitem, blank_finish

1332

1333 def enumerator(self, match, context, next_state):

1334 """Enumerated List Item"""

1335 format, sequence, text, ordinal = self.parse_enumerator(match)

1336 if not self.is_enumerated_list_item(ordinal, sequence, format):

1337 raise statemachine.TransitionCorrection('text')

1338 enumlist = nodes.enumerated_list()

1339 (enumlist.source,

1340 enumlist.line) = self.state_machine.get_source_and_line()

1341 self.parent += enumlist

1342 if sequence == '#':

1343 enumlist['enumtype'] = 'arabic'

1344 else:

1345 enumlist['enumtype'] = sequence

1346 enumlist['prefix'] = self.enum.formatinfo[format].prefix

1347 enumlist['suffix'] = self.enum.formatinfo[format].suffix

1348 if ordinal != 1:

1349 enumlist['start'] = ordinal

1350 msg = self.reporter.info(

1351 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'

1352 % (text, ordinal), base_node=enumlist)

1353 self.parent += msg

1354 listitem, blank_finish = self.list_item(match.end())

1355 enumlist += listitem

1356 offset = self.state_machine.line_offset + 1 # next line

1357 newline_offset, blank_finish = self.nested_list_parse(

1358 self.state_machine.input_lines[offset:],

1359 input_offset=self.state_machine.abs_line_offset() + 1,

1360 node=enumlist, initial_state='EnumeratedList',

1361 blank_finish=blank_finish,

1362 extra_settings={'lastordinal': ordinal,

1363 'format': format,

1364 'auto': sequence == '#'})

1365 self.goto_line(newline_offset)

1366 if not blank_finish:

1367 self.parent += self.unindent_warning('Enumerated list')

1368 return [], next_state, []

1369

1370 def parse_enumerator(self, match, expected_sequence=None):

1371 """

1372 Analyze an enumerator and return the results.

1373

1374 :Return:

1375 - the enumerator format ('period', 'parens', or 'rparen'),

1376 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),

1377 - the text of the enumerator, stripped of formatting, and

1378 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;

1379 ``None`` is returned for invalid enumerator text).

1380

1381 The enumerator format has already been determined by the regular

1382 expression match. If `expected_sequence` is given, that sequence is

1383 tried first. If not, we check for Roman numeral 1. This way,

1384 single-character Roman numerals (which are also alphabetical) can be

1385 matched. If no sequence has been matched, all sequences are checked in

1386 order.

1387 """

1388 groupdict = match.groupdict()

1389 sequence = ''

1390 for format in self.enum.formats:

1391 if groupdict[format]: # was this the format matched?

1392 break # yes; keep `format`

1393 else: # shouldn't happen

1394 raise ParserError('enumerator format not matched')

1395 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501

1396 : self.enum.formatinfo[format].end]

1397 if text == '#':

1398 sequence = '#'

1399 elif expected_sequence:

1400 try:

1401 if self.enum.sequenceregexps[expected_sequence].match(text):

1402 sequence = expected_sequence

1403 except KeyError: # shouldn't happen

1404 raise ParserError('unknown enumerator sequence: %s'

1405 % sequence)

1406 elif text == 'i':

1407 sequence = 'lowerroman'

1408 elif text == 'I':

1409 sequence = 'upperroman'

1410 if not sequence:

1411 for sequence in self.enum.sequences:

1412 if self.enum.sequenceregexps[sequence].match(text):

1413 break

1414 else: # shouldn't happen

1415 raise ParserError('enumerator sequence not matched')

1416 if sequence == '#':

1417 ordinal = 1

1418 else:

1419 try:

1420 ordinal = int(self.enum.converters[sequence](text))

1421 except InvalidRomanNumeralError:

1422 ordinal = None

1423 return format, sequence, text, ordinal

1424

1425 def is_enumerated_list_item(self, ordinal, sequence, format):

1426 """

1427 Check validity based on the ordinal value and the second line.

1428

1429 Return true if the ordinal is valid and the second line is blank,

1430 indented, or starts with the next enumerator or an auto-enumerator.

1431 """

1432 if ordinal is None:

1433 return None

1434 try:

1435 next_line = self.state_machine.next_line()

1436 except EOFError: # end of input lines

1437 self.state_machine.previous_line()

1438 return 1

1439 else:

1440 self.state_machine.previous_line()

1441 if not next_line[:1].strip(): # blank or indented

1442 return 1

1443 result = self.make_enumerator(ordinal + 1, sequence, format)

1444 if result:

1445 next_enumerator, auto_enumerator = result

1446 try:

1447 if next_line.startswith((next_enumerator, auto_enumerator)):

1448 return 1

1449 except TypeError:

1450 pass

1451 return None

1452

1453 def make_enumerator(self, ordinal, sequence, format):

1454 """

1455 Construct and return the next enumerated list item marker, and an

1456 auto-enumerator ("#" instead of the regular enumerator).

1457

1458 Return ``None`` for invalid (out of range) ordinals.

1459 """

1460 if sequence == '#':

1461 enumerator = '#'

1462 elif sequence == 'arabic':

1463 enumerator = str(ordinal)

1464 else:

1465 if sequence.endswith('alpha'):

1466 if ordinal > 26:

1467 return None

1468 enumerator = chr(ordinal + ord('a') - 1)

1469 elif sequence.endswith('roman'):

1470 try:

1471 enumerator = RomanNumeral(ordinal).to_uppercase()

1472 except TypeError:

1473 return None

1474 else: # shouldn't happen

1475 raise ParserError('unknown enumerator sequence: "%s"'

1476 % sequence)

1477 if sequence.startswith('lower'):

1478 enumerator = enumerator.lower()

1479 elif sequence.startswith('upper'):

1480 enumerator = enumerator.upper()

1481 else: # shouldn't happen

1482 raise ParserError('unknown enumerator sequence: "%s"'

1483 % sequence)

1484 formatinfo = self.enum.formatinfo[format]

1485 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix

1486 + ' ')

1487 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '

1488 return next_enumerator, auto_enumerator

1489

1490 def field_marker(self, match, context, next_state):

1491 """Field list item."""

1492 field_list = nodes.field_list()

1493 self.parent += field_list

1494 field, blank_finish = self.field(match)

1495 field_list += field

1496 offset = self.state_machine.line_offset + 1 # next line

1497 newline_offset, blank_finish = self.nested_list_parse(

1498 self.state_machine.input_lines[offset:],

1499 input_offset=self.state_machine.abs_line_offset() + 1,

1500 node=field_list, initial_state='FieldList',

1501 blank_finish=blank_finish)

1502 self.goto_line(newline_offset)

1503 if not blank_finish:

1504 self.parent += self.unindent_warning('Field list')

1505 return [], next_state, []

1506

1507 def field(self, match):

1508 name = self.parse_field_marker(match)

1509 src, srcline = self.state_machine.get_source_and_line()

1510 lineno = self.state_machine.abs_line_number()

1511 (indented, indent, line_offset, blank_finish

1512 ) = self.state_machine.get_first_known_indented(match.end())

1513 field_node = nodes.field()

1514 field_node.source = src

1515 field_node.line = srcline

1516 name_nodes, name_messages = self.inline_text(name, lineno)

1517 field_node += nodes.field_name(name, '', *name_nodes)

1518 field_body = nodes.field_body('\n'.join(indented), *name_messages)

1519 field_node += field_body

1520 if indented:

1521 self.parse_field_body(indented, line_offset, field_body)

1522 return field_node, blank_finish

1523

1524 def parse_field_marker(self, match):

1525 """Extract & return field name from a field marker match."""

1526 field = match.group()[1:] # strip off leading ':'

1527 field = field[:field.rfind(':')] # strip off trailing ':' etc.

1528 return field

1529

1530 def parse_field_body(self, indented, offset, node) -> None:

1531 self.nested_parse(indented, input_offset=offset, node=node)

1532

1533 def option_marker(self, match, context, next_state):

1534 """Option list item."""

1535 optionlist = nodes.option_list()

1536 (optionlist.source, optionlist.line

1537 ) = self.state_machine.get_source_and_line()

1538 try:

1539 listitem, blank_finish = self.option_list_item(match)

1540 except MarkupError as error:

1541 # This shouldn't happen; pattern won't match.

1542 msg = self.reporter.error('Invalid option list marker: %s'

1543 % error)

1544 self.parent += msg

1545 (indented, indent, line_offset, blank_finish

1546 ) = self.state_machine.get_first_known_indented(match.end())

1547 elements = self.block_quote(indented, line_offset)

1548 self.parent += elements

1549 if not blank_finish:

1550 self.parent += self.unindent_warning('Option list')

1551 return [], next_state, []

1552 self.parent += optionlist

1553 optionlist += listitem

1554 offset = self.state_machine.line_offset + 1 # next line

1555 newline_offset, blank_finish = self.nested_list_parse(

1556 self.state_machine.input_lines[offset:],

1557 input_offset=self.state_machine.abs_line_offset() + 1,

1558 node=optionlist, initial_state='OptionList',

1559 blank_finish=blank_finish)

1560 self.goto_line(newline_offset)

1561 if not blank_finish:

1562 self.parent += self.unindent_warning('Option list')

1563 return [], next_state, []

1564

1565 def option_list_item(self, match):

1566 offset = self.state_machine.abs_line_offset()

1567 options = self.parse_option_marker(match)

1568 (indented, indent, line_offset, blank_finish

1569 ) = self.state_machine.get_first_known_indented(match.end())

1570 if not indented: # not an option list item

1571 self.goto_line(offset)

1572 raise statemachine.TransitionCorrection('text')

1573 option_group = nodes.option_group('', *options)

1574 description = nodes.description('\n'.join(indented))

1575 option_list_item = nodes.option_list_item('', option_group,

1576 description)

1577 if indented:

1578 self.nested_parse(indented, input_offset=line_offset,

1579 node=description)

1580 return option_list_item, blank_finish

1581

1582 def parse_option_marker(self, match):

1583 """

1584 Return a list of `node.option` and `node.option_argument` objects,

1585 parsed from an option marker match.

1586

1587 :Exception: `MarkupError` for invalid option markers.

1588 """

1589 optlist = []

1590 # split at ", ", except inside < > (complex arguments)

1591 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())

1592 for optionstring in optionstrings:

1593 tokens = optionstring.split()

1594 delimiter = ' '

1595 firstopt = tokens[0].split('=', 1)

1596 if len(firstopt) > 1:

1597 # "--opt=value" form

1598 tokens[:1] = firstopt

1599 delimiter = '='

1600 elif (len(tokens[0]) > 2

1601 and ((tokens[0].startswith('-')

1602 and not tokens[0].startswith('--'))

1603 or tokens[0].startswith('+'))):

1604 # "-ovalue" form

1605 tokens[:1] = [tokens[0][:2], tokens[0][2:]]

1606 delimiter = ''

1607 if len(tokens) > 1 and (tokens[1].startswith('<')

1608 and tokens[-1].endswith('>')):

1609 # "-o <value1 value2>" form; join all values into one token

1610 tokens[1:] = [' '.join(tokens[1:])]

1611 if 0 < len(tokens) <= 2:

1612 option = nodes.option(optionstring)

1613 option += nodes.option_string(tokens[0], tokens[0])

1614 if len(tokens) > 1:

1615 option += nodes.option_argument(tokens[1], tokens[1],

1616 delimiter=delimiter)

1617 optlist.append(option)

1618 else:

1619 raise MarkupError(

1620 'wrong number of option tokens (=%s), should be 1 or 2: '

1621 '"%s"' % (len(tokens), optionstring))

1622 return optlist

1623

1624 def doctest(self, match, context, next_state):

1625 line = self.document.current_line

1626 data = '\n'.join(self.state_machine.get_text_block())

1627 # TODO: Parse with `directives.body.CodeBlock` with

1628 # argument 'pycon' (Python Console) in Docutils 1.0.

1629 n = nodes.doctest_block(data, data)

1630 n.line = line

1631 self.parent += n

1632 return [], next_state, []

1633

1634 def line_block(self, match, context, next_state):

1635 """First line of a line block."""

1636 block = nodes.line_block()

1637 self.parent += block

1638 lineno = self.state_machine.abs_line_number()

1639 (block.source,

1640 block.line) = self.state_machine.get_source_and_line(lineno)

1641 line, messages, blank_finish = self.line_block_line(match, lineno)

1642 block += line

1643 self.parent += messages

1644 if not blank_finish:

1645 offset = self.state_machine.line_offset + 1 # next line

1646 new_line_offset, blank_finish = self.nested_list_parse(

1647 self.state_machine.input_lines[offset:],

1648 input_offset=self.state_machine.abs_line_offset() + 1,

1649 node=block, initial_state='LineBlock',

1650 blank_finish=False)

1651 self.goto_line(new_line_offset)

1652 if not blank_finish:

1653 self.parent += self.reporter.warning(

1654 'Line block ends without a blank line.',

1655 line=lineno+1)

1656 if len(block):

1657 if block[0].indent is None:

1658 block[0].indent = 0

1659 self.nest_line_block_lines(block)

1660 return [], next_state, []

1661

1662 def line_block_line(self, match, lineno):

1663 """Return one line element of a line_block."""

1664 (indented, indent, line_offset, blank_finish

1665 ) = self.state_machine.get_first_known_indented(match.end(),

1666 until_blank=True)

1667 text = '\n'.join(indented)

1668 text_nodes, messages = self.inline_text(text, lineno)

1669 line = nodes.line(text, '', *text_nodes)

1670 (line.source,

1671 line.line) = self.state_machine.get_source_and_line(lineno)

1672 if match.string.rstrip() != '|': # not empty

1673 line.indent = len(match.group(1)) - 1

1674 return line, messages, blank_finish

1675

1676 def nest_line_block_lines(self, block) -> None:

1677 for index in range(1, len(block)):

1678 if block[index].indent is None:

1679 block[index].indent = block[index - 1].indent

1680 self.nest_line_block_segment(block)

1681

1682 def nest_line_block_segment(self, block) -> None:

1683 indents = [item.indent for item in block]

1684 least = min(indents)

1685 new_items = []

1686 new_block = nodes.line_block()

1687 for item in block:

1688 if item.indent > least:

1689 new_block.append(item)

1690 else:

1691 if len(new_block):

1692 self.nest_line_block_segment(new_block)

1693 new_items.append(new_block)

1694 new_block = nodes.line_block()

1695 new_items.append(item)

1696 if len(new_block):

1697 self.nest_line_block_segment(new_block)

1698 new_items.append(new_block)

1699 block[:] = new_items

1700

1701 def grid_table_top(self, match, context, next_state):

1702 """Top border of a full table."""

1703 return self.table_top(match, context, next_state,

1704 self.isolate_grid_table,

1705 tableparser.GridTableParser)

1706

1707 def simple_table_top(self, match, context, next_state):

1708 """Top border of a simple table."""

1709 return self.table_top(match, context, next_state,

1710 self.isolate_simple_table,

1711 tableparser.SimpleTableParser)

1712

1713 def table_top(self, match, context, next_state,

1714 isolate_function, parser_class):

1715 """Top border of a generic table."""

1716 nodelist, blank_finish = self.table(isolate_function, parser_class)

1717 self.parent += nodelist

1718 if not blank_finish:

1719 msg = self.reporter.warning(

1720 'Blank line required after table.',

1721 line=self.state_machine.abs_line_number()+1)

1722 self.parent += msg

1723 return [], next_state, []

1724

1725 def table(self, isolate_function, parser_class):

1726 """Parse a table."""

1727 block, messages, blank_finish = isolate_function()

1728 if block:

1729 try:

1730 parser = parser_class()

1731 tabledata = parser.parse(block)

1732 tableline = (self.state_machine.abs_line_number() - len(block)

1733 + 1)

1734 table = self.build_table(tabledata, tableline)

1735 nodelist = [table] + messages

1736 except tableparser.TableMarkupError as err:

1737 nodelist = self.malformed_table(block, ' '.join(err.args),

1738 offset=err.offset) + messages

1739 else:

1740 nodelist = messages

1741 return nodelist, blank_finish

1742

1743 def isolate_grid_table(self):

1744 messages = []

1745 blank_finish = True

1746 try:

1747 block = self.state_machine.get_text_block(flush_left=True)

1748 except statemachine.UnexpectedIndentationError as err:

1749 block, src, srcline = err.args

1750 messages.append(self.reporter.error('Unexpected indentation.',

1751 source=src, line=srcline))

1752 blank_finish = False

1753 block.disconnect()

1754 # for East Asian chars:

1755 block.pad_double_width(self.double_width_pad_char)

1756 width = len(block[0].strip())

1757 for i in range(len(block)):

1758 block[i] = block[i].strip()

1759 if block[i][0] not in '+|': # check left edge

1760 blank_finish = False

1761 self.state_machine.previous_line(len(block) - i)

1762 del block[i:]

1763 break

1764 if not self.grid_table_top_pat.match(block[-1]): # find bottom

1765 # from second-last to third line of table:

1766 for i in range(len(block) - 2, 1, -1):

1767 if self.grid_table_top_pat.match(block[i]):

1768 self.state_machine.previous_line(len(block) - i + 1)

1769 del block[i+1:]

1770 blank_finish = False

1771 break

1772 else:

1773 detail = 'Bottom border missing or corrupt.'

1774 messages.extend(self.malformed_table(block, detail, i))

1775 return [], messages, blank_finish

1776 for i in range(len(block)): # check right edge

1777 if len(block[i]) != width or block[i][-1] not in '+|':

1778 detail = 'Right border not aligned or missing.'

1779 messages.extend(self.malformed_table(block, detail, i))

1780 return [], messages, blank_finish

1781 return block, messages, blank_finish

1782

1783 def isolate_simple_table(self):

1784 start = self.state_machine.line_offset

1785 lines = self.state_machine.input_lines

1786 limit = len(lines) - 1

1787 toplen = len(lines[start].strip())

1788 pattern_match = self.simple_table_border_pat.match

1789 found = 0

1790 found_at = None

1791 i = start + 1

1792 while i <= limit:

1793 line = lines[i]

1794 match = pattern_match(line)

1795 if match:

1796 if len(line.strip()) != toplen:

1797 self.state_machine.next_line(i - start)

1798 messages = self.malformed_table(

1799 lines[start:i+1], 'Bottom border or header rule does '

1800 'not match top border.', i-start)

1801 return [], messages, i == limit or not lines[i+1].strip()

1802 found += 1

1803 found_at = i

1804 if found == 2 or i == limit or not lines[i+1].strip():

1805 end = i

1806 break

1807 i += 1

1808 else: # reached end of input_lines

1809 details = 'No bottom table border found'

1810 if found:

1811 details += ' or no blank line after table bottom'

1812 self.state_machine.next_line(found_at - start)

1813 block = lines[start:found_at+1]

1814 else:

1815 self.state_machine.next_line(i - start - 1)

1816 block = lines[start:]

1817 messages = self.malformed_table(block, details + '.')

1818 return [], messages, not found

1819 self.state_machine.next_line(end - start)

1820 block = lines[start:end+1]

1821 # for East Asian chars:

1822 block.pad_double_width(self.double_width_pad_char)

1823 return block, [], end == limit or not lines[end+1].strip()

1824

1825 def malformed_table(self, block, detail='', offset=0):

1826 block.replace(self.double_width_pad_char, '')

1827 data = '\n'.join(block)

1828 message = 'Malformed table.'

1829 startline = self.state_machine.abs_line_number() - len(block) + 1

1830 if detail:

1831 message += '\n' + detail

1832 error = self.reporter.error(message, nodes.literal_block(data, data),

1833 line=startline+offset)

1834 return [error]

1835

1836 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):

1837 colwidths, headrows, bodyrows = tabledata

1838 table = nodes.table()

1839 if widths == 'auto':

1840 table['classes'] += ['colwidths-auto']

1841 elif widths: # "grid" or list of integers

1842 table['classes'] += ['colwidths-given']

1843 tgroup = nodes.tgroup(cols=len(colwidths))

1844 table += tgroup

1845 for colwidth in colwidths:

1846 colspec = nodes.colspec(colwidth=colwidth)

1847 if stub_columns:

1848 colspec.attributes['stub'] = True

1849 stub_columns -= 1

1850 tgroup += colspec

1851 if headrows:

1852 thead = nodes.thead()

1853 tgroup += thead

1854 for row in headrows:

1855 thead += self.build_table_row(row, tableline)

1856 tbody = nodes.tbody()

1857 tgroup += tbody

1858 for row in bodyrows:

1859 tbody += self.build_table_row(row, tableline)

1860 return table

1861

1862 def build_table_row(self, rowdata, tableline):

1863 row = nodes.row()

1864 for cell in rowdata:

1865 if cell is None:

1866 continue

1867 morerows, morecols, offset, cellblock = cell

1868 attributes = {}

1869 if morerows:

1870 attributes['morerows'] = morerows

1871 if morecols:

1872 attributes['morecols'] = morecols

1873 entry = nodes.entry(**attributes)

1874 row += entry

1875 if ''.join(cellblock):

1876 self.nested_parse(cellblock, input_offset=tableline+offset,

1877 node=entry)

1878 return row

1879

1880 explicit = Struct()

1881 """Patterns and constants used for explicit markup recognition."""

1882

1883 explicit.patterns = Struct(

1884 target=re.compile(r"""

1885 (

1886 _ # anonymous target

1887 | # *OR*

1888 (?!_) # no underscore at the beginning

1889 (?P<quote>`?) # optional open quote

1890 (?![ `]) # first char. not space or

1891 # backquote

1892 (?P<name> # reference name

1893 .+?

1894 )

1895 %(non_whitespace_escape_before)s

1896 (?P=quote) # close quote if open quote used

1897 )

1898 (?<!(?<!\x00):) # no unescaped colon at end

1899 %(non_whitespace_escape_before)s

1900 [ ]? # optional space

1901 : # end of reference name

1902 ([ ]+|$) # followed by whitespace

1903 """ % vars(Inliner), re.VERBOSE),

1904 reference=re.compile(r"""

1905 (

1906 (?P<simple>%(simplename)s)_

1907 | # *OR*

1908 ` # open backquote

1909 (?![ ]) # not space

1910 (?P<phrase>.+?) # hyperlink phrase

1911 %(non_whitespace_escape_before)s

1912 `_ # close backquote,

1913 # reference mark

1914 )

1915 $ # end of string

1916 """ % vars(Inliner), re.VERBOSE),

1917 substitution=re.compile(r"""

1918 (

1919 (?![ ]) # first char. not space

1920 (?P<name>.+?) # substitution text

1921 %(non_whitespace_escape_before)s

1922 \| # close delimiter

1923 )

1924 ([ ]+|$) # followed by whitespace

1925 """ % vars(Inliner),

1926 re.VERBOSE),)

1927

1928 def footnote(self, match):

1929 src, srcline = self.state_machine.get_source_and_line()

1930 (indented, indent, offset, blank_finish

1931 ) = self.state_machine.get_first_known_indented(match.end())

1932 label = match.group(1)

1933 name = normalize_name(label)

1934 footnote = nodes.footnote('\n'.join(indented))

1935 footnote.source = src

1936 footnote.line = srcline

1937 if name[0] == '#': # auto-numbered

1938 name = name[1:] # autonumber label

1939 footnote['auto'] = 1

1940 if name:

1941 footnote['names'].append(name)

1942 self.document.note_autofootnote(footnote)

1943 elif name == '*': # auto-symbol

1944 name = ''

1945 footnote['auto'] = '*'

1946 self.document.note_symbol_footnote(footnote)

1947 else: # manually numbered

1948 footnote += nodes.label('', label)

1949 footnote['names'].append(name)

1950 self.document.note_footnote(footnote)

1951 if name:

1952 self.document.note_explicit_target(footnote, footnote)

1953 else:

1954 self.document.set_id(footnote, footnote)

1955 if indented:

1956 self.nested_parse(indented, input_offset=offset, node=footnote)

1957 else:

1958 footnote += self.reporter.warning('Footnote content expected.')

1959 return [footnote], blank_finish

1960

1961 def citation(self, match):

1962 src, srcline = self.state_machine.get_source_and_line()

1963 (indented, indent, offset, blank_finish

1964 ) = self.state_machine.get_first_known_indented(match.end())

1965 label = match.group(1)

1966 name = normalize_name(label)

1967 citation = nodes.citation('\n'.join(indented))

1968 citation.source = src

1969 citation.line = srcline

1970 citation += nodes.label('', label)

1971 citation['names'].append(name)

1972 self.document.note_citation(citation)

1973 self.document.note_explicit_target(citation, citation)

1974 if indented:

1975 self.nested_parse(indented, input_offset=offset, node=citation)

1976 else:

1977 citation += self.reporter.warning('Citation content expected.')

1978 return [citation], blank_finish

1979

1980 def hyperlink_target(self, match):

1981 pattern = self.explicit.patterns.target

1982 lineno = self.state_machine.abs_line_number()

1983 (block, indent, offset, blank_finish

1984 ) = self.state_machine.get_first_known_indented(

1985 match.end(), until_blank=True, strip_indent=False)

1986 blocktext = match.string[:match.end()] + '\n'.join(block)

1987 block = [escape2null(line) for line in block]

1988 escaped = block[0]

1989 blockindex = 0

1990 while True:

1991 targetmatch = pattern.match(escaped)

1992 if targetmatch:

1993 break

1994 blockindex += 1

1995 try:

1996 escaped += block[blockindex]

1997 except IndexError:

1998 raise MarkupError('malformed hyperlink target.')

1999 del block[:blockindex]

2000 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()

2001 target = self.make_target(block, blocktext, lineno,

2002 targetmatch.group('name'))

2003 return [target], blank_finish

2004

2005 def make_target(self, block, block_text, lineno, target_name):

2006 target_type, data = self.parse_target(block, block_text, lineno)

2007 if target_type == 'refname':

2008 target = nodes.target(block_text, '', refname=normalize_name(data))

2009 target.indirect_reference_name = data

2010 self.add_target(target_name, '', target, lineno)

2011 self.document.note_indirect_target(target)

2012 return target

2013 elif target_type == 'refuri':

2014 target = nodes.target(block_text, '')

2015 self.add_target(target_name, data, target, lineno)

2016 return target

2017 else:

2018 return data

2019

2020 def parse_target(self, block, block_text, lineno):

2021 """

2022 Determine the type of reference of a target.

2023

2024 :Return: A 2-tuple, one of:

2025

2026 - 'refname' and the indirect reference name

2027 - 'refuri' and the URI

2028 - 'malformed' and a system_message node

2029 """

2030 if block and block[-1].strip()[-1:] == '_': # possible indirect target

2031 reference = ' '.join(line.strip() for line in block)

2032 refname = self.is_reference(reference)

2033 if refname:

2034 return 'refname', refname

2035 ref_parts = split_escaped_whitespace(' '.join(block))

2036 reference = ' '.join(''.join(unescape(part).split())

2037 for part in ref_parts)

2038 return 'refuri', reference

2039

2040 def is_reference(self, reference):

2041 match = self.explicit.patterns.reference.match(

2042 whitespace_normalize_name(reference))

2043 if not match:

2044 return None

2045 return unescape(match.group('simple') or match.group('phrase'))

2046

2047 def add_target(self, targetname, refuri, target, lineno):

2048 target.line = lineno

2049 if targetname:

2050 name = normalize_name(unescape(targetname))

2051 target['names'].append(name)

2052 if refuri:

2053 uri = self.inliner.adjust_uri(refuri)

2054 if uri:

2055 target['refuri'] = uri

2056 else:

2057 raise ApplicationError('problem with URI: %r' % refuri)

2058 self.document.note_explicit_target(target, self.parent)

2059 else: # anonymous target

2060 if refuri:

2061 target['refuri'] = refuri

2062 target['anonymous'] = True

2063 self.document.note_anonymous_target(target)

2064

2065 def substitution_def(self, match):

2066 pattern = self.explicit.patterns.substitution

2067 src, srcline = self.state_machine.get_source_and_line()

2068 (block, indent, offset, blank_finish

2069 ) = self.state_machine.get_first_known_indented(match.end(),

2070 strip_indent=False)

2071 blocktext = (match.string[:match.end()] + '\n'.join(block))

2072 block.disconnect()

2073 escaped = escape2null(block[0].rstrip())

2074 blockindex = 0

2075 while True:

2076 subdefmatch = pattern.match(escaped)

2077 if subdefmatch:

2078 break

2079 blockindex += 1

2080 try:

2081 escaped = escaped + ' ' + escape2null(

2082 block[blockindex].strip())

2083 except IndexError:

2084 raise MarkupError('malformed substitution definition.')

2085 del block[:blockindex] # strip out the substitution marker

2086 start = subdefmatch.end()-len(escaped)-1

2087 block[0] = (block[0].strip() + ' ')[start:-1]

2088 if not block[0]:

2089 del block[0]

2090 offset += 1

2091 while block and not block[-1].strip():

2092 block.pop()

2093 subname = subdefmatch.group('name')

2094 substitution_node = nodes.substitution_definition(blocktext)

2095 substitution_node.source = src

2096 substitution_node.line = srcline

2097 if not block:

2098 msg = self.reporter.warning(

2099 'Substitution definition "%s" missing contents.' % subname,

2100 nodes.literal_block(blocktext, blocktext),

2101 source=src, line=srcline)

2102 return [msg], blank_finish

2103 block[0] = block[0].strip()

2104 substitution_node['names'].append(

2105 nodes.whitespace_normalize_name(subname))

2106 new_abs_offset, blank_finish = self.nested_list_parse(

2107 block, input_offset=offset, node=substitution_node,

2108 initial_state='SubstitutionDef', blank_finish=blank_finish)

2109 i = 0

2110 for node in substitution_node[:]:

2111 if not (isinstance(node, nodes.Inline)

2112 or isinstance(node, nodes.Text)):

2113 self.parent += substitution_node[i]

2114 del substitution_node[i]

2115 else:

2116 i += 1

2117 for node in substitution_node.findall(nodes.Element):

2118 if self.disallowed_inside_substitution_definitions(node):

2119 pformat = nodes.literal_block('', node.pformat().rstrip())

2120 msg = self.reporter.error(

2121 'Substitution definition contains illegal element <%s>:'

2122 % node.tagname,

2123 pformat, nodes.literal_block(blocktext, blocktext),

2124 source=src, line=srcline)

2125 return [msg], blank_finish

2126 if len(substitution_node) == 0:

2127 msg = self.reporter.warning(

2128 'Substitution definition "%s" empty or invalid.' % subname,

2129 nodes.literal_block(blocktext, blocktext),

2130 source=src, line=srcline)

2131 return [msg], blank_finish

2132 self.document.note_substitution_def(

2133 substitution_node, subname, self.parent)

2134 return [substitution_node], blank_finish

2135

2136 def disallowed_inside_substitution_definitions(self, node) -> bool:

2137 if (node['ids']

2138 or isinstance(node, nodes.reference) and node.get('anonymous')

2139 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501

2140 return True

2141 else:

2142 return False

2143

2144 def directive(self, match, **option_presets):

2145 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""

2146 type_name = match.group(1)

2147 directive_class, messages = directives.directive(

2148 type_name, self.memo.language, self.document)

2149 self.parent += messages

2150 if directive_class:

2151 return self.run_directive(

2152 directive_class, match, type_name, option_presets)

2153 else:

2154 return self.unknown_directive(type_name)

2155

2156 def run_directive(self, directive, match, type_name, option_presets):

2157 """

2158 Parse a directive then run its directive function.

2159

2160 Parameters:

2161

2162 - `directive`: The class implementing the directive. Must be

2163 a subclass of `rst.Directive`.

2164

2165 - `match`: A regular expression match object which matched the first

2166 line of the directive.

2167

2168 - `type_name`: The directive name, as used in the source text.

2169

2170 - `option_presets`: A dictionary of preset options, defaults for the

2171 directive options. Currently, only an "alt" option is passed by

2172 substitution definitions (value: the substitution name), which may

2173 be used by an embedded image directive.

2174

2175 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.

2176 """

2177 if isinstance(directive, (FunctionType, MethodType)):

2178 from docutils.parsers.rst import convert_directive_function

2179 directive = convert_directive_function(directive)

2180 lineno = self.state_machine.abs_line_number()

2181 initial_line_offset = self.state_machine.line_offset

2182 (indented, indent, line_offset, blank_finish

2183 ) = self.state_machine.get_first_known_indented(match.end(),

2184 strip_top=0)

2185 block_text = '\n'.join(self.state_machine.input_lines[

2186 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501

2187 try:

2188 arguments, options, content, content_offset = (

2189 self.parse_directive_block(indented, line_offset,

2190 directive, option_presets))

2191 except MarkupError as detail:

2192 error = self.reporter.error(

2193 'Error in "%s" directive:\n%s.' % (type_name,

2194 ' '.join(detail.args)),

2195 nodes.literal_block(block_text, block_text), line=lineno)

2196 return [error], blank_finish

2197 directive_instance = directive(

2198 type_name, arguments, options, content, lineno,

2199 content_offset, block_text, self, self.state_machine)

2200 try:

2201 result = directive_instance.run()

2202 except docutils.parsers.rst.DirectiveError as error:

2203 msg_node = self.reporter.system_message(error.level, error.msg,

2204 line=lineno)

2205 msg_node += nodes.literal_block(block_text, block_text)

2206 result = [msg_node]

2207 assert isinstance(result, list), \

2208 'Directive "%s" must return a list of nodes.' % type_name

2209 for i in range(len(result)):

2210 assert isinstance(result[i], nodes.Node), \

2211 ('Directive "%s" returned non-Node object (index %s): %r'

2212 % (type_name, i, result[i]))

2213 return (result,

2214 blank_finish or self.state_machine.is_next_line_blank())

2215

2216 def parse_directive_block(self, indented, line_offset, directive,

2217 option_presets):

2218 option_spec = directive.option_spec

2219 has_content = directive.has_content

2220 if indented and not indented[0].strip():

2221 indented.trim_start()

2222 line_offset += 1

2223 while indented and not indented[-1].strip():

2224 indented.trim_end()

2225 if indented and (directive.required_arguments

2226 or directive.optional_arguments

2227 or option_spec):

2228 for i, line in enumerate(indented):

2229 if not line.strip():

2230 break

2231 else:

2232 i += 1

2233 arg_block = indented[:i]

2234 content = indented[i+1:]

2235 content_offset = line_offset + i + 1

2236 else:

2237 content = indented

2238 content_offset = line_offset

2239 arg_block = []

2240 if option_spec:

2241 options, arg_block = self.parse_directive_options(

2242 option_presets, option_spec, arg_block)

2243 else:

2244 options = {}

2245 if arg_block and not (directive.required_arguments

2246 or directive.optional_arguments):

2247 content = arg_block + indented[i:]

2248 content_offset = line_offset

2249 arg_block = []

2250 while content and not content[0].strip():

2251 content.trim_start()

2252 content_offset += 1

2253 if directive.required_arguments or directive.optional_arguments:

2254 arguments = self.parse_directive_arguments(

2255 directive, arg_block)

2256 else:

2257 arguments = []

2258 if content and not has_content:

2259 raise MarkupError('no content permitted')

2260 return arguments, options, content, content_offset

2261

2262 def parse_directive_options(self, option_presets, option_spec, arg_block):

2263 options = option_presets.copy()

2264 for i, line in enumerate(arg_block):

2265 if re.match(Body.patterns['field_marker'], line):

2266 opt_block = arg_block[i:]

2267 arg_block = arg_block[:i]

2268 break

2269 else:

2270 opt_block = []

2271 if opt_block:

2272 success, data = self.parse_extension_options(option_spec,

2273 opt_block)

2274 if success: # data is a dict of options

2275 options.update(data)

2276 else: # data is an error string

2277 raise MarkupError(data)

2278 return options, arg_block

2279

2280 def parse_directive_arguments(self, directive, arg_block):

2281 required = directive.required_arguments

2282 optional = directive.optional_arguments

2283 arg_text = '\n'.join(arg_block)

2284 arguments = arg_text.split()

2285 if len(arguments) < required:

2286 raise MarkupError('%s argument(s) required, %s supplied'

2287 % (required, len(arguments)))

2288 elif len(arguments) > required + optional:

2289 if directive.final_argument_whitespace:

2290 arguments = arg_text.split(None, required + optional - 1)

2291 else:

2292 raise MarkupError(

2293 'maximum %s argument(s) allowed, %s supplied'

2294 % (required + optional, len(arguments)))

2295 return arguments

2296

2297 def parse_extension_options(self, option_spec, datalines):

2298 """

2299 Parse `datalines` for a field list containing extension options

2300 matching `option_spec`.

2301

2302 :Parameters:

2303 - `option_spec`: a mapping of option name to conversion

2304 function, which should raise an exception on bad input.

2305 - `datalines`: a list of input strings.

2306

2307 :Return:

2308 - Success value, 1 or 0.

2309 - An option dictionary on success, an error string on failure.

2310 """

2311 node = nodes.field_list()

2312 newline_offset, blank_finish = self.nested_list_parse(

2313 datalines, 0, node, initial_state='ExtensionOptions',

2314 blank_finish=True)

2315 if newline_offset != len(datalines): # incomplete parse of block

2316 return 0, 'invalid option block'

2317 try:

2318 options = utils.extract_extension_options(node, option_spec)

2319 except KeyError as detail:

2320 return 0, 'unknown option: "%s"' % detail.args[0]

2321 except (ValueError, TypeError) as detail:

2322 return 0, 'invalid option value: %s' % ' '.join(detail.args)

2323 except utils.ExtensionOptionError as detail:

2324 return 0, 'invalid option data: %s' % ' '.join(detail.args)

2325 if blank_finish:

2326 return 1, options

2327 else:

2328 return 0, 'option data incompletely parsed'

2329

2330 def unknown_directive(self, type_name):

2331 lineno = self.state_machine.abs_line_number()

2332 (indented, indent, offset, blank_finish

2333 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)

2334 text = '\n'.join(indented)

2335 error = self.reporter.error('Unknown directive type "%s".' % type_name,

2336 nodes.literal_block(text, text),

2337 line=lineno)

2338 return [error], blank_finish

2339

2340 def comment(self, match):

2341 if self.state_machine.is_next_line_blank():

2342 first_comment_line = match.string[match.end():]

2343 if not first_comment_line.strip(): # empty comment

2344 return [nodes.comment()], True # "A tiny but practical wart."

2345 if first_comment_line.startswith('end of inclusion from "'):

2346 # cf. parsers.rst.directives.misc.Include

2347 self.document.include_log.pop()

2348 return [], True

2349 (indented, indent, offset, blank_finish

2350 ) = self.state_machine.get_first_known_indented(match.end())

2351 while indented and not indented[-1].strip():

2352 indented.trim_end()

2353 text = '\n'.join(indented)

2354 return [nodes.comment(text, text)], blank_finish

2355

2356 explicit.constructs = [

2357 (footnote,

2358 re.compile(r"""

2359 \.\.[ ]+ # explicit markup start

2360 \[

2361 ( # footnote label:

2362 [0-9]+ # manually numbered footnote

2363 | # *OR*

2364 \# # anonymous auto-numbered footnote

2365 | # *OR*

2366 \#%s # auto-number ed?) footnote label

2367 | # *OR*

2368 \* # auto-symbol footnote

2369 )

2370 \]

2371 ([ ]+|$) # whitespace or end of line

2372 """ % Inliner.simplename, re.VERBOSE)),

2373 (citation,

2374 re.compile(r"""

2375 \.\.[ ]+ # explicit markup start

2376 \[(%s)\] # citation label

2377 ([ ]+|$) # whitespace or end of line

2378 """ % Inliner.simplename, re.VERBOSE)),

2379 (hyperlink_target,

2380 re.compile(r"""

2381 \.\.[ ]+ # explicit markup start

2382 _ # target indicator

2383 (?![ ]|$) # first char. not space or EOL

2384 """, re.VERBOSE)),

2385 (substitution_def,

2386 re.compile(r"""

2387 \.\.[ ]+ # explicit markup start

2388 \| # substitution indicator

2389 (?![ ]|$) # first char. not space or EOL

2390 """, re.VERBOSE)),

2391 (directive,

2392 re.compile(r"""

2393 \.\.[ ]+ # explicit markup start

2394 (%s) # directive name

2395 [ ]? # optional space

2396 :: # directive delimiter

2397 ([ ]+|$) # whitespace or end of line

2398 """ % Inliner.simplename, re.VERBOSE))]

2399

2400 def explicit_markup(self, match, context, next_state):

2401 """Footnotes, hyperlink targets, directives, comments."""

2402 nodelist, blank_finish = self.explicit_construct(match)

2403 self.parent += nodelist

2404 self.explicit_list(blank_finish)

2405 return [], next_state, []

2406

2407 def explicit_construct(self, match):

2408 """Determine which explicit construct this is, parse & return it."""

2409 errors = []

2410 for method, pattern in self.explicit.constructs:

2411 expmatch = pattern.match(match.string)

2412 if expmatch:

2413 try:

2414 return method(self, expmatch)

2415 except MarkupError as error:

2416 lineno = self.state_machine.abs_line_number()

2417 message = ' '.join(error.args)

2418 errors.append(self.reporter.warning(message, line=lineno))

2419 break

2420 nodelist, blank_finish = self.comment(match)

2421 return nodelist + errors, blank_finish

2422

2423 def explicit_list(self, blank_finish) -> None:

2424 """

2425 Create a nested state machine for a series of explicit markup

2426 constructs (including anonymous hyperlink targets).

2427 """

2428 offset = self.state_machine.line_offset + 1 # next line

2429 newline_offset, blank_finish = self.nested_list_parse(

2430 self.state_machine.input_lines[offset:],

2431 input_offset=self.state_machine.abs_line_offset() + 1,

2432 node=self.parent, initial_state='Explicit',

2433 blank_finish=blank_finish,

2434 match_titles=self.state_machine.match_titles)

2435 self.goto_line(newline_offset)

2436 if not blank_finish:

2437 self.parent += self.unindent_warning('Explicit markup')

2438

2439 def anonymous(self, match, context, next_state):

2440 """Anonymous hyperlink targets."""

2441 nodelist, blank_finish = self.anonymous_target(match)

2442 self.parent += nodelist

2443 self.explicit_list(blank_finish)

2444 return [], next_state, []

2445

2446 def anonymous_target(self, match):

2447 lineno = self.state_machine.abs_line_number()

2448 (block, indent, offset, blank_finish

2449 ) = self.state_machine.get_first_known_indented(match.end(),

2450 until_blank=True)

2451 blocktext = match.string[:match.end()] + '\n'.join(block)

2452 block = [escape2null(line) for line in block]

2453 target = self.make_target(block, blocktext, lineno, '')

2454 return [target], blank_finish

2455

2456 def line(self, match, context, next_state):

2457 """Section title overline or transition marker."""

2458 if self.state_machine.match_titles:

2459 return [match.string], 'Line', []

2460 elif match.string.strip() == '::':

2461 raise statemachine.TransitionCorrection('text')

2462 elif len(match.string.strip()) < 4:

2463 msg = self.reporter.info(

2464 'Unexpected possible title overline or transition.\n'

2465 "Treating it as ordinary text because it's so short.",

2466 line=self.state_machine.abs_line_number())

2467 self.parent += msg

2468 raise statemachine.TransitionCorrection('text')

2469 else:

2470 blocktext = self.state_machine.line

2471 msg = self.reporter.error(

2472 'Unexpected section title or transition.',

2473 nodes.literal_block(blocktext, blocktext),

2474 line=self.state_machine.abs_line_number())

2475 self.parent += msg

2476 return [], next_state, []

2477

2478 def text(self, match, context, next_state):

2479 """Titles, definition lists, paragraphs."""

2480 return [match.string], 'Text', []

2481

2482

2483class RFC2822Body(Body):

2484

2485 """

2486 RFC2822 headers are only valid as the first constructs in documents. As

2487 soon as anything else appears, the `Body` state should take over.

2488 """

2489

2490 patterns = Body.patterns.copy() # can't modify the original

2491 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'

2492 initial_transitions = [(name, 'Body')

2493 for name in Body.initial_transitions]

2494 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'

2495

2496 def rfc2822(self, match, context, next_state):

2497 """RFC2822-style field list item."""

2498 fieldlist = nodes.field_list(classes=['rfc2822'])

2499 self.parent += fieldlist

2500 field, blank_finish = self.rfc2822_field(match)

2501 fieldlist += field

2502 offset = self.state_machine.line_offset + 1 # next line

2503 newline_offset, blank_finish = self.nested_list_parse(

2504 self.state_machine.input_lines[offset:],

2505 input_offset=self.state_machine.abs_line_offset() + 1,

2506 node=fieldlist, initial_state='RFC2822List',

2507 blank_finish=blank_finish)

2508 self.goto_line(newline_offset)

2509 if not blank_finish:

2510 self.parent += self.unindent_warning(

2511 'RFC2822-style field list')

2512 return [], next_state, []

2513

2514 def rfc2822_field(self, match):

2515 name = match.string[:match.string.find(':')]

2516 (indented, indent, line_offset, blank_finish

2517 ) = self.state_machine.get_first_known_indented(match.end(),

2518 until_blank=True)

2519 fieldnode = nodes.field()

2520 fieldnode += nodes.field_name(name, name)

2521 fieldbody = nodes.field_body('\n'.join(indented))

2522 fieldnode += fieldbody

2523 if indented:

2524 self.nested_parse(indented, input_offset=line_offset,

2525 node=fieldbody)

2526 return fieldnode, blank_finish

2527

2528

2529class SpecializedBody(Body):

2530

2531 """

2532 Superclass for second and subsequent compound element members. Compound

2533 elements are lists and list-like constructs.

2534

2535 All transition methods are disabled (redefined as `invalid_input`).

2536 Override individual methods in subclasses to re-enable.

2537

2538 For example, once an initial bullet list item, say, is recognized, the

2539 `BulletList` subclass takes over, with a "bullet_list" node as its

2540 container. Upon encountering the initial bullet list item, `Body.bullet`

2541 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which

2542 starts up a nested parsing session with `BulletList` as the initial state.

2543 Only the ``bullet`` transition method is enabled in `BulletList`; as long

2544 as only bullet list items are encountered, they are parsed and inserted

2545 into the container. The first construct which is *not* a bullet list item

2546 triggers the `invalid_input` method, which ends the nested parse and

2547 closes the container. `BulletList` needs to recognize input that is

2548 invalid in the context of a bullet list, which means everything *other

2549 than* bullet list items, so it inherits the transition list created in

2550 `Body`.

2551 """

2552

2553 def invalid_input(self, match=None, context=None, next_state=None):

2554 """Not a compound element member. Abort this state machine."""

2555 self.state_machine.previous_line() # back up so parent SM can reassess

2556 raise EOFError

2557

2558 indent = invalid_input

2559 bullet = invalid_input

2560 enumerator = invalid_input

2561 field_marker = invalid_input

2562 option_marker = invalid_input

2563 doctest = invalid_input

2564 line_block = invalid_input

2565 grid_table_top = invalid_input

2566 simple_table_top = invalid_input

2567 explicit_markup = invalid_input

2568 anonymous = invalid_input

2569 line = invalid_input

2570 text = invalid_input

2571

2572

2573class BulletList(SpecializedBody):

2574

2575 """Second and subsequent bullet_list list_items."""

2576

2577 def bullet(self, match, context, next_state):

2578 """Bullet list item."""

2579 if match.string[0] != self.parent['bullet']:

2580 # different bullet: new list

2581 self.invalid_input()

2582 listitem, blank_finish = self.list_item(match.end())

2583 self.parent += listitem

2584 self.blank_finish = blank_finish

2585 return [], next_state, []

2586

2587

2588class DefinitionList(SpecializedBody):

2589

2590 """Second and subsequent definition_list_items."""

2591

2592 def text(self, match, context, next_state):

2593 """Definition lists."""

2594 return [match.string], 'Definition', []

2595

2596

2597class EnumeratedList(SpecializedBody):

2598

2599 """Second and subsequent enumerated_list list_items."""

2600

2601 def enumerator(self, match, context, next_state):

2602 """Enumerated list item."""

2603 format, sequence, text, ordinal = self.parse_enumerator(

2604 match, self.parent['enumtype'])

2605 if (format != self.format

2606 or (sequence != '#' and (sequence != self.parent['enumtype']

2607 or self.auto

2608 or ordinal != (self.lastordinal + 1)))

2609 or not self.is_enumerated_list_item(ordinal, sequence, format)):

2610 # different enumeration: new list

2611 self.invalid_input()

2612 if sequence == '#':

2613 self.auto = 1

2614 listitem, blank_finish = self.list_item(match.end())

2615 self.parent += listitem

2616 self.blank_finish = blank_finish

2617 self.lastordinal = ordinal

2618 return [], next_state, []

2619

2620

2621class FieldList(SpecializedBody):

2622

2623 """Second and subsequent field_list fields."""

2624

2625 def field_marker(self, match, context, next_state):

2626 """Field list field."""

2627 field, blank_finish = self.field(match)

2628 self.parent += field

2629 self.blank_finish = blank_finish

2630 return [], next_state, []

2631

2632

2633class OptionList(SpecializedBody):

2634

2635 """Second and subsequent option_list option_list_items."""

2636

2637 def option_marker(self, match, context, next_state):

2638 """Option list item."""

2639 try:

2640 option_list_item, blank_finish = self.option_list_item(match)

2641 except MarkupError:

2642 self.invalid_input()

2643 self.parent += option_list_item

2644 self.blank_finish = blank_finish

2645 return [], next_state, []

2646

2647

2648class RFC2822List(SpecializedBody, RFC2822Body):

2649

2650 """Second and subsequent RFC2822-style field_list fields."""

2651

2652 patterns = RFC2822Body.patterns

2653 initial_transitions = RFC2822Body.initial_transitions

2654

2655 def rfc2822(self, match, context, next_state):

2656 """RFC2822-style field list item."""

2657 field, blank_finish = self.rfc2822_field(match)

2658 self.parent += field

2659 self.blank_finish = blank_finish

2660 return [], 'RFC2822List', []

2661

2662 blank = SpecializedBody.invalid_input

2663

2664

2665class ExtensionOptions(FieldList):

2666

2667 """

2668 Parse field_list fields for extension options.

2669

2670 No nested parsing is done (including inline markup parsing).

2671 """

2672

2673 def parse_field_body(self, indented, offset, node) -> None:

2674 """Override `Body.parse_field_body` for simpler parsing."""

2675 lines = []

2676 for line in list(indented) + ['']:

2677 if line.strip():

2678 lines.append(line)

2679 elif lines:

2680 text = '\n'.join(lines)

2681 node += nodes.paragraph(text, text)

2682 lines = []

2683

2684

2685class LineBlock(SpecializedBody):

2686

2687 """Second and subsequent lines of a line_block."""

2688

2689 blank = SpecializedBody.invalid_input

2690

2691 def line_block(self, match, context, next_state):

2692 """New line of line block."""

2693 lineno = self.state_machine.abs_line_number()

2694 line, messages, blank_finish = self.line_block_line(match, lineno)

2695 self.parent += line

2696 self.parent.parent += messages

2697 self.blank_finish = blank_finish

2698 return [], next_state, []

2699

2700

2701class Explicit(SpecializedBody):

2702

2703 """Second and subsequent explicit markup construct."""

2704

2705 def explicit_markup(self, match, context, next_state):

2706 """Footnotes, hyperlink targets, directives, comments."""

2707 nodelist, blank_finish = self.explicit_construct(match)

2708 self.parent += nodelist

2709 self.blank_finish = blank_finish

2710 return [], next_state, []

2711

2712 def anonymous(self, match, context, next_state):

2713 """Anonymous hyperlink targets."""

2714 nodelist, blank_finish = self.anonymous_target(match)

2715 self.parent += nodelist

2716 self.blank_finish = blank_finish

2717 return [], next_state, []

2718

2719 blank = SpecializedBody.invalid_input

2720

2721

2722class SubstitutionDef(Body):

2723

2724 """

2725 Parser for the contents of a substitution_definition element.

2726 """

2727

2728 patterns = {

2729 'embedded_directive': re.compile(r'(%s)::( +|$)'

2730 % Inliner.simplename),

2731 'text': r''}

2732 initial_transitions = ['embedded_directive', 'text']

2733

2734 def embedded_directive(self, match, context, next_state):

2735 nodelist, blank_finish = self.directive(match,

2736 alt=self.parent['names'][0])

2737 self.parent += nodelist

2738 if not self.state_machine.at_eof():

2739 self.blank_finish = blank_finish

2740 raise EOFError

2741

2742 def text(self, match, context, next_state):

2743 if not self.state_machine.at_eof():

2744 self.blank_finish = self.state_machine.is_next_line_blank()

2745 raise EOFError

2746

2747

2748class Text(RSTState):

2749

2750 """

2751 Classifier of second line of a text block.

2752

2753 Could be a paragraph, a definition list item, or a title.

2754 """

2755

2756 patterns = {'underline': Body.patterns['line'],

2757 'text': r''}

2758 initial_transitions = [('underline', 'Body'), ('text', 'Body')]

2759

2760 def blank(self, match, context, next_state):

2761 """End of paragraph."""

2762 # NOTE: self.paragraph returns [node, system_message(s)], literalnext

2763 paragraph, literalnext = self.paragraph(

2764 context, self.state_machine.abs_line_number() - 1)

2765 self.parent += paragraph

2766 if literalnext:

2767 self.parent += self.literal_block()

2768 return [], 'Body', []

2769

2770 def eof(self, context):

2771 if context:

2772 self.blank(None, context, None)

2773 return []

2774

2775 def indent(self, match, context, next_state):

2776 """Definition list item."""

2777 dl = nodes.definition_list()

2778 # the definition list starts on the line before the indent:

2779 lineno = self.state_machine.abs_line_number() - 1

2780 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)

2781 dl_item, blank_finish = self.definition_list_item(context)

2782 dl += dl_item

2783 self.parent += dl

2784 offset = self.state_machine.line_offset + 1 # next line

2785 newline_offset, blank_finish = self.nested_list_parse(

2786 self.state_machine.input_lines[offset:],

2787 input_offset=self.state_machine.abs_line_offset() + 1,

2788 node=dl, initial_state='DefinitionList',

2789 blank_finish=blank_finish, blank_finish_state='Definition')

2790 self.goto_line(newline_offset)

2791 if not blank_finish:

2792 self.parent += self.unindent_warning('Definition list')

2793 return [], 'Body', []

2794

2795 def underline(self, match, context, next_state):

2796 """Section title."""

2797 lineno = self.state_machine.abs_line_number()

2798 title = context[0].rstrip()

2799 underline = match.string.rstrip()

2800 source = title + '\n' + underline

2801 messages = []

2802 if column_width(title) > len(underline):

2803 if len(underline) < 4:

2804 if self.state_machine.match_titles:

2805 msg = self.reporter.info(

2806 'Possible title underline, too short for the title.\n'

2807 "Treating it as ordinary text because it's so short.",

2808 line=lineno)

2809 self.parent += msg

2810 raise statemachine.TransitionCorrection('text')

2811 else:

2812 blocktext = context[0] + '\n' + self.state_machine.line

2813 msg = self.reporter.warning(

2814 'Title underline too short.',

2815 nodes.literal_block(blocktext, blocktext),

2816 line=lineno)

2817 messages.append(msg)

2818 if not self.state_machine.match_titles:

2819 blocktext = context[0] + '\n' + self.state_machine.line

2820 # We need get_source_and_line() here to report correctly

2821 src, srcline = self.state_machine.get_source_and_line()

2822 # TODO: why is abs_line_number() == srcline+1

2823 # if the error is in a table (try with test_tables.py)?

2824 # print("get_source_and_line", srcline)

2825 # print("abs_line_number", self.state_machine.abs_line_number())

2826 msg = self.reporter.error(

2827 'Unexpected section title.',

2828 nodes.literal_block(blocktext, blocktext),

2829 source=src, line=srcline)

2830 self.parent += messages

2831 self.parent += msg

2832 return [], next_state, []

2833 style = underline[0]

2834 context[:] = []

2835 self.section(title, source, style, lineno - 1, messages)

2836 return [], next_state, []

2837

2838 def text(self, match, context, next_state):

2839 """Paragraph."""

2840 startline = self.state_machine.abs_line_number() - 1

2841 msg = None

2842 try:

2843 block = self.state_machine.get_text_block(flush_left=True)

2844 except statemachine.UnexpectedIndentationError as err:

2845 block, src, srcline = err.args

2846 msg = self.reporter.error('Unexpected indentation.',

2847 source=src, line=srcline)

2848 lines = context + list(block)

2849 paragraph, literalnext = self.paragraph(lines, startline)

2850 self.parent += paragraph

2851 self.parent += msg

2852 if literalnext:

2853 try:

2854 self.state_machine.next_line()

2855 except EOFError:

2856 pass

2857 self.parent += self.literal_block()

2858 return [], next_state, []

2859

2860 def literal_block(self):

2861 """Return a list of nodes."""

2862 (indented, indent, offset, blank_finish

2863 ) = self.state_machine.get_indented()

2864 while indented and not indented[-1].strip():

2865 indented.trim_end()

2866 if not indented:

2867 return self.quoted_literal_block()

2868 data = '\n'.join(indented)

2869 literal_block = nodes.literal_block(data, data)

2870 (literal_block.source,

2871 literal_block.line) = self.state_machine.get_source_and_line(offset+1)

2872 nodelist = [literal_block]

2873 if not blank_finish:

2874 nodelist.append(self.unindent_warning('Literal block'))

2875 return nodelist

2876

2877 def quoted_literal_block(self):

2878 abs_line_offset = self.state_machine.abs_line_offset()

2879 offset = self.state_machine.line_offset

2880 parent_node = nodes.Element()

2881 new_abs_offset = self.nested_parse(

2882 self.state_machine.input_lines[offset:],

2883 input_offset=abs_line_offset, node=parent_node, match_titles=False,

2884 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),

2885 'initial_state': 'QuotedLiteralBlock'})

2886 self.goto_line(new_abs_offset)

2887 return parent_node.children

2888

2889 def definition_list_item(self, termline):

2890 # the parser is already on the second (indented) line:

2891 dd_lineno = self.state_machine.abs_line_number()

2892 dt_lineno = dd_lineno - 1

2893 (indented, indent, line_offset, blank_finish

2894 ) = self.state_machine.get_indented()

2895 dl_item = nodes.definition_list_item(

2896 '\n'.join(termline + list(indented)))

2897 (dl_item.source,

2898 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)

2899 dt_nodes, messages = self.term(termline, dt_lineno)

2900 dl_item += dt_nodes

2901 dd = nodes.definition('', *messages)

2902 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)

2903 dl_item += dd

2904 if termline[0][-2:] == '::':

2905 dd += self.reporter.info(

2906 'Blank line missing before literal block (after the "::")? '

2907 'Interpreted as a definition list item.',

2908 line=dd_lineno)

2909 # TODO: drop a definition if it is an empty comment to allow

2910 # definition list items with several terms?

2911 # https://sourceforge.net/p/docutils/feature-requests/60/

2912 self.nested_parse(indented, input_offset=line_offset, node=dd)

2913 return dl_item, blank_finish

2914

2915 classifier_delimiter = re.compile(' +: +')

2916

2917 def term(self, lines, lineno):

2918 """Return a definition_list's term and optional classifiers."""

2919 assert len(lines) == 1

2920 text_nodes, messages = self.inline_text(lines[0], lineno)

2921 dt = nodes.term(lines[0])

2922 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)

2923 node_list = [dt]

2924 for i in range(len(text_nodes)):

2925 node = text_nodes[i]

2926 if isinstance(node, nodes.Text):

2927 parts = self.classifier_delimiter.split(node)

2928 if len(parts) == 1:

2929 node_list[-1] += node

2930 else:

2931 text = parts[0].rstrip()

2932 textnode = nodes.Text(text)

2933 node_list[-1] += textnode

2934 node_list += [nodes.classifier(unescape(part, True), part)

2935 for part in parts[1:]]

2936 else:

2937 node_list[-1] += node

2938 return node_list, messages

2939

2940

2941class SpecializedText(Text):

2942

2943 """

2944 Superclass for second and subsequent lines of Text-variants.

2945

2946 All transition methods are disabled. Override individual methods in

2947 subclasses to re-enable.

2948 """

2949

2950 def eof(self, context):

2951 """Incomplete construct."""

2952 return []

2953

2954 def invalid_input(self, match=None, context=None, next_state=None):

2955 """Not a compound element member. Abort this state machine."""

2956 raise EOFError

2957

2958 blank = invalid_input

2959 indent = invalid_input

2960 underline = invalid_input

2961 text = invalid_input

2962

2963

2964class Definition(SpecializedText):

2965

2966 """Second line of potential definition_list_item."""

2967

2968 def eof(self, context):

2969 """Not a definition."""

2970 self.state_machine.previous_line(2) # so parent SM can reassess

2971 return []

2972

2973 def indent(self, match, context, next_state):

2974 """Definition list item."""

2975 dl_item, blank_finish = self.definition_list_item(context)

2976 self.parent += dl_item

2977 self.blank_finish = blank_finish

2978 return [], 'DefinitionList', []

2979

2980

2981class Line(SpecializedText):

2982

2983 """

2984 Second line of over- & underlined section title or transition marker.

2985 """

2986

2987 eofcheck = 1 # ignored, will be removed in Docutils 2.0.

2988

2989 def eof(self, context):

2990 """Transition marker at end of section or document."""

2991 marker = context[0].strip()

2992 if len(marker) < 4:

2993 self.state_correction(context)

2994 src, srcline = self.state_machine.get_source_and_line()

2995 # lineno = self.state_machine.abs_line_number() - 1

2996 transition = nodes.transition(rawsource=context[0])

2997 transition.source = src

2998 transition.line = srcline - 1

2999 # transition.line = lineno

3000 self.parent += transition

3001 return []

3002

3003 def blank(self, match, context, next_state):

3004 """Transition marker."""

3005 src, srcline = self.state_machine.get_source_and_line()

3006 marker = context[0].strip()

3007 if len(marker) < 4:

3008 self.state_correction(context)

3009 transition = nodes.transition(rawsource=marker)

3010 transition.source = src

3011 transition.line = srcline - 1

3012 self.parent += transition

3013 return [], 'Body', []

3014

3015 def text(self, match, context, next_state):

3016 """Potential over- & underlined title."""

3017 lineno = self.state_machine.abs_line_number() - 1

3018 overline = context[0]

3019 title = match.string

3020 underline = ''

3021 try:

3022 underline = self.state_machine.next_line()

3023 except EOFError:

3024 blocktext = overline + '\n' + title

3025 if len(overline.rstrip()) < 4:

3026 self.short_overline(context, blocktext, lineno, 2)

3027 else:

3028 msg = self.reporter.error(

3029 'Incomplete section title.',

3030 nodes.literal_block(blocktext, blocktext),

3031 line=lineno)

3032 self.parent += msg

3033 return [], 'Body', []

3034 source = '%s\n%s\n%s' % (overline, title, underline)

3035 overline = overline.rstrip()

3036 underline = underline.rstrip()

3037 if not self.transitions['underline'][0].match(underline):

3038 blocktext = overline + '\n' + title + '\n' + underline

3039 if len(overline.rstrip()) < 4:

3040 self.short_overline(context, blocktext, lineno, 2)

3041 else:

3042 msg = self.reporter.error(

3043 'Missing matching underline for section title overline.',

3044 nodes.literal_block(source, source),

3045 line=lineno)

3046 self.parent += msg

3047 return [], 'Body', []

3048 elif overline != underline:

3049 blocktext = overline + '\n' + title + '\n' + underline

3050 if len(overline.rstrip()) < 4:

3051 self.short_overline(context, blocktext, lineno, 2)

3052 else:

3053 msg = self.reporter.error(

3054 'Title overline & underline mismatch.',

3055 nodes.literal_block(source, source),

3056 line=lineno)

3057 self.parent += msg

3058 return [], 'Body', []

3059 title = title.rstrip()

3060 messages = []

3061 if column_width(title) > len(overline):

3062 blocktext = overline + '\n' + title + '\n' + underline

3063 if len(overline.rstrip()) < 4:

3064 self.short_overline(context, blocktext, lineno, 2)

3065 else:

3066 msg = self.reporter.warning(

3067 'Title overline too short.',

3068 nodes.literal_block(source, source),

3069 line=lineno)

3070 messages.append(msg)

3071 style = (overline[0], underline[0])

3072 self.section(title.lstrip(), source, style, lineno + 1, messages)

3073 return [], 'Body', []

3074

3075 indent = text # indented title

3076

3077 def underline(self, match, context, next_state):

3078 overline = context[0]

3079 blocktext = overline + '\n' + self.state_machine.line

3080 lineno = self.state_machine.abs_line_number() - 1

3081 if len(overline.rstrip()) < 4:

3082 self.short_overline(context, blocktext, lineno, 1)

3083 msg = self.reporter.error(

3084 'Invalid section title or transition marker.',

3085 nodes.literal_block(blocktext, blocktext),

3086 line=lineno)

3087 self.parent += msg

3088 return [], 'Body', []

3089

3090 def short_overline(self, context, blocktext, lineno, lines=1) -> None:

3091 msg = self.reporter.info(

3092 'Possible incomplete section title.\nTreating the overline as '

3093 "ordinary text because it's so short.",

3094 line=lineno)

3095 self.parent += msg

3096 self.state_correction(context, lines)

3097

3098 def state_correction(self, context, lines=1):

3099 self.state_machine.previous_line(lines)

3100 context[:] = []

3101 raise statemachine.StateCorrection('Body', 'text')

3102

3103

3104class QuotedLiteralBlock(RSTState):

3105

3106 """

3107 Nested parse handler for quoted (unindented) literal blocks.

3108

3109 Special-purpose. Not for inclusion in `state_classes`.

3110 """

3111

3112 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,

3113 'text': r''}

3114 initial_transitions = ('initial_quoted', 'text')

3115

3116 def __init__(self, state_machine, debug=False) -> None:

3117 RSTState.__init__(self, state_machine, debug)

3118 self.messages = []

3119 self.initial_lineno = None

3120

3121 def blank(self, match, context, next_state):

3122 if context:

3123 raise EOFError

3124 else:

3125 return context, next_state, []

3126

3127 def eof(self, context):

3128 if context:

3129 src, srcline = self.state_machine.get_source_and_line(

3130 self.initial_lineno)

3131 text = '\n'.join(context)

3132 literal_block = nodes.literal_block(text, text)

3133 literal_block.source = src

3134 literal_block.line = srcline

3135 self.parent += literal_block

3136 else:

3137 self.parent += self.reporter.warning(

3138 'Literal block expected; none found.',

3139 line=self.state_machine.abs_line_number()

3140 ) # src not available, statemachine.input_lines is empty

3141 self.state_machine.previous_line()

3142 self.parent += self.messages

3143 return []

3144

3145 def indent(self, match, context, next_state):

3146 assert context, ('QuotedLiteralBlock.indent: context should not '

3147 'be empty!')

3148 self.messages.append(

3149 self.reporter.error('Unexpected indentation.',

3150 line=self.state_machine.abs_line_number()))

3151 self.state_machine.previous_line()

3152 raise EOFError

3153

3154 def initial_quoted(self, match, context, next_state):

3155 """Match arbitrary quote character on the first line only."""

3156 self.remove_transition('initial_quoted')

3157 quote = match.string[0]

3158 pattern = re.compile(re.escape(quote))

3159 # New transition matches consistent quotes only:

3160 self.add_transition('quoted',

3161 (pattern, self.quoted, self.__class__.__name__))

3162 self.initial_lineno = self.state_machine.abs_line_number()

3163 return [match.string], next_state, []

3164

3165 def quoted(self, match, context, next_state):

3166 """Match consistent quotes on subsequent lines."""

3167 context.append(match.string)

3168 return context, next_state, []

3169

3170 def text(self, match, context, next_state):

3171 if context:

3172 self.messages.append(

3173 self.reporter.error('Inconsistent literal block quoting.',

3174 line=self.state_machine.abs_line_number()))

3175 self.state_machine.previous_line()

3176 raise EOFError

3177

3178

3179state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,

3180 OptionList, LineBlock, ExtensionOptions, Explicit, Text,

3181 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)

3182"""Standard set of State classes used to start `RSTStateMachine`."""