Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/parsers/rst/states.py: 68%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Copyright: This module has been placed in the public domain.

5"""

6This is the ``docutils.parsers.rst.states`` module, the core of

7the reStructuredText parser. It defines the following:

9:Classes:

10 - `RSTStateMachine`: reStructuredText parser's entry point.

11 - `NestedStateMachine`: recursive StateMachine.

12 - `RSTState`: reStructuredText State superclass.

13 - `Inliner`: For parsing inline markup.

14 - `Body`: Generic classifier of the first line of a block.

15 - `SpecializedBody`: Superclass for compound element members.

16 - `BulletList`: Second and subsequent bullet_list list_items

17 - `DefinitionList`: Second+ definition_list_items.

18 - `EnumeratedList`: Second+ enumerated_list list_items.

19 - `FieldList`: Second+ fields.

20 - `OptionList`: Second+ option_list_items.

21 - `RFC2822List`: Second+ RFC2822-style fields.

22 - `ExtensionOptions`: Parses directive option fields.

23 - `Explicit`: Second+ explicit markup constructs.

24 - `SubstitutionDef`: For embedded directives in substitution definitions.

25 - `Text`: Classifier of second line of a text block.

26 - `SpecializedText`: Superclass for continuation lines of Text-variants.

27 - `Definition`: Second line of potential definition_list_item.

28 - `Line`: Second line of overlined section title or transition marker.

29 - `Struct`: obsolete, use `types.SimpleNamespace`.

31:Exception classes:

32 - `MarkupError`

33 - `ParserError`

34 - `MarkupMismatch`

36:Functions:

37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.

38 - `unescape()`: Return a string, nulls removed or restored to backslashes.

40:Attributes:

41 - `state_classes`: set of State classes used with `RSTStateMachine`.

43Parser Overview

44===============

46The reStructuredText parser is implemented as a recursive state machine,

47examining its input one line at a time. To understand how the parser works,

48please first become familiar with the `docutils.statemachine` module. In the

49description below, references are made to classes defined in this module;

50please see the individual classes for details.

52Parsing proceeds as follows:

541. The state machine examines each line of input, checking each of the

55 transition patterns of the state `Body`, in order, looking for a match.

56 The implicit transitions (blank lines and indentation) are checked before

57 any others. The 'text' transition is a catch-all (matches anything).

592. The method associated with the matched transition pattern is called.

61 A. Some transition methods are self-contained, appending elements to the

62 document tree (`Body.doctest` parses a doctest block). The parser's

63 current line index is advanced to the end of the element, and parsing

64 continues with step 1.

66 B. Other transition methods trigger the creation of a nested state machine,

67 whose job is to parse a compound construct ('indent' does a block quote,

68 'bullet' does a bullet list, 'overline' does a section [first checking

69 for a valid section header], etc.).

71 - In the case of lists and explicit markup, a one-off state machine is

72 created and run to parse contents of the first item.

74 - A new state machine is created and its initial state is set to the

75 appropriate specialized state (`BulletList` in the case of the

76 'bullet' transition; see `SpecializedBody` for more detail). This

77 state machine is run to parse the compound element (or series of

78 explicit markup elements), and returns as soon as a non-member element

79 is encountered. For example, the `BulletList` state machine ends as

80 soon as it encounters an element which is not a list item of that

81 bullet list. The optional omission of inter-element blank lines is

82 enabled by this nested state machine.

84 - The current line index is advanced to the end of the elements parsed,

85 and parsing continues with step 1.

87 C. The result of the 'text' transition depends on the next line of text.

88 The current state is changed to `Text`, under which the second line is

89 examined. If the second line is:

91 - Indented: The element is a definition list item, and parsing proceeds

92 similarly to step 2.B, using the `DefinitionList` state.

94 - A line of uniform punctuation characters: The element is a section

95 header; again, parsing proceeds as in step 2.B, and `Body` is still

96 used.

98 - Anything else: The element is a paragraph, which is examined for

99 inline markup and appended to the parent element. Processing

100 continues with step 1.

101"""

102

103from __future__ import annotations

104

105__docformat__ = 'reStructuredText'

106

107import copy

108import re

109from types import FunctionType, MethodType

110from types import SimpleNamespace as Struct

111import warnings

112

113from docutils import nodes, statemachine, utils

114from docutils import ApplicationError, DataError

115from docutils.statemachine import StateMachineWS, StateWS

116from docutils.nodes import fully_normalize_name as normalize_name

117from docutils.nodes import unescape, whitespace_normalize_name

118import docutils.parsers.rst

119from docutils.parsers.rst import directives, languages, tableparser, roles

120from docutils.utils import escape2null, column_width

121from docutils.utils import punctuation_chars, urischemes

122from docutils.utils import split_escaped_whitespace

123from docutils.utils._roman_numerals import (InvalidRomanNumeralError,

124 RomanNumeral)

125

126TYPE_CHECKING = False

127if TYPE_CHECKING:

128 from docutils.statemachine import StringList

129

130

131class MarkupError(DataError): pass

132class UnknownInterpretedRoleError(DataError): pass

133class InterpretedRoleNotImplementedError(DataError): pass

134class ParserError(ApplicationError): pass

135class MarkupMismatch(Exception): pass

136

137

138class RSTStateMachine(StateMachineWS):

139

140 """

141 reStructuredText's master StateMachine.

142

143 The entry point to reStructuredText parsing is the `run()` method.

144 """

145

146 def run(self, input_lines, document, input_offset=0, match_titles=True,

147 inliner=None) -> None:

148 """

149 Parse `input_lines` and modify the `document` node in place.

150

151 Extend `StateMachineWS.run()`: set up parse-global data and

152 run the StateMachine.

153 """

154 self.language = languages.get_language(

155 document.settings.language_code, document.reporter)

156 self.match_titles = match_titles

157 if inliner is None:

158 inliner = Inliner()

159 inliner.init_customizations(document.settings)

160 # A collection of objects to share with nested parsers.

161 # The attributes `reporter`, `section_level`, and

162 # `section_bubble_up_kludge` will be removed in Docutils 2.0

163 self.memo = Struct(document=document,

164 reporter=document.reporter, # ignored

165 language=self.language,

166 title_styles=[],

167 section_level=0, # ignored

168 section_bubble_up_kludge=False, # ignored

169 inliner=inliner)

170 self.document = document

171 self.attach_observer(document.note_source)

172 self.reporter = self.document.reporter

173 self.node = document

174 results = StateMachineWS.run(self, input_lines, input_offset,

175 input_source=document['source'])

176 assert results == [], 'RSTStateMachine.run() results should be empty!'

177 self.node = self.memo = None # remove unneeded references

178

179

180class NestedStateMachine(StateMachineWS):

181 """

182 StateMachine run from within other StateMachine runs, to parse nested

183 document structures.

184 """

185

186 def run(self, input_lines, input_offset, memo, node, match_titles=True):

187 """

188 Parse `input_lines` and populate `node`.

189

190 Use a separate "title style hierarchy" (changed in Docutils 0.23).

191

192 Extend `StateMachineWS.run()`: set up document-wide data.

193 """

194 self.match_titles = match_titles

195 self.memo = copy.copy(memo)

196 self.document = memo.document

197 self.attach_observer(self.document.note_source)

198 self.language = memo.language

199 self.reporter = self.document.reporter

200 self.node = node

201 if match_titles:

202 # Use a separate section title style hierarchy;

203 # ensure all sections in the `input_lines` are treated as

204 # subsections of the current section by blocking lower

205 # section levels with a style that is impossible in rST:

206 self.memo.title_styles = ['x'] * len(node.section_hierarchy())

207 results = StateMachineWS.run(self, input_lines, input_offset)

208 assert results == [], ('NestedStateMachine.run() results should be '

209 'empty!')

210 return results

211

212

213class RSTState(StateWS):

214

215 """

216 reStructuredText State superclass.

217

218 Contains methods used by all State subclasses.

219 """

220

221 nested_sm = NestedStateMachine

222 nested_sm_cache = []

223

224 def __init__(self, state_machine, debug=False) -> None:

225 self.nested_sm_kwargs = {'state_classes': state_classes,

226 'initial_state': 'Body'}

227 StateWS.__init__(self, state_machine, debug)

228

229 def runtime_init(self) -> None:

230 StateWS.runtime_init(self)

231 memo = self.state_machine.memo

232 self.memo = memo

233 self.document = memo.document

234 self.inliner = memo.inliner

235 self.reporter = self.document.reporter

236 # enable the reporter to determine source and source-line

237 if not hasattr(self.reporter, 'get_source_and_line'):

238 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501

239

240 @property

241 def parent(self) -> nodes.Element | None:

242 return self.state_machine.node

243

244 @parent.setter

245 def parent(self, value: nodes.Element):

246 self.state_machine.node = value

247

248 def goto_line(self, abs_line_offset) -> None:

249 """

250 Jump to input line `abs_line_offset`, ignoring jumps past the end.

251 """

252 try:

253 self.state_machine.goto_line(abs_line_offset)

254 except EOFError:

255 pass

256

257 def no_match(self, context, transitions):

258 """

259 Override `StateWS.no_match` to generate a system message.

260

261 This code should never be run.

262 """

263 self.reporter.severe(

264 'Internal error: no transition pattern match. State: "%s"; '

265 'transitions: %s; context: %s; current line: %r.'

266 % (self.__class__.__name__, transitions, context,

267 self.state_machine.line))

268 return context, None, []

269

270 def bof(self, context):

271 """Called at beginning of file."""

272 return [], []

273

274 def nested_parse(self,

275 block: StringList,

276 input_offset: int,

277 node: nodes.Element,

278 match_titles: bool = False,

279 state_machine_class: StateMachineWS|None = None,

280 state_machine_kwargs: dict|None = None

281 ) -> int:

282 """

283 Parse the input `block` with a nested state-machine rooted at `node`.

284

285 :block:

286 reStructuredText source extract.

287 :input_offset:

288 Line number at start of the block.

289 :node:

290 Base node. All generated nodes will be appended to this node.

291 :match_titles:

292 Allow section titles?

293 A separate section title style hierarchy is used for the nested

294 parsing (all sections are subsections of the current section).

295 The calling code should check whether sections are valid

296 children of the base node and move them or warn otherwise.

297 :state_machine_class:

298 Default: `NestedStateMachine`.

299 :state_machine_kwargs:

300 Keyword arguments for the state-machine instantiation.

301 Default: `self.nested_sm_kwargs`.

302

303 Create a new state-machine instance if required.

304 Return new offset.

305 """

306 use_default = 0

307 if state_machine_class is None:

308 state_machine_class = self.nested_sm

309 use_default += 1

310 if state_machine_kwargs is None:

311 state_machine_kwargs = self.nested_sm_kwargs

312 use_default += 1

313 my_state_machine = None

314 if use_default == 2:

315 try:

316 # get cached state machine, prevent others from using it

317 my_state_machine = self.nested_sm_cache.pop()

318 except IndexError:

319 pass

320 if not my_state_machine:

321 my_state_machine = state_machine_class(

322 debug=self.debug,

323 parent_state_machine=self.state_machine,

324 **state_machine_kwargs)

325 # run the state machine and populate `node`:

326 block_length = len(block)

327 my_state_machine.run(block, input_offset, memo=self.memo,

328 node=node, match_titles=match_titles)

329 # clean up

330 new_offset = my_state_machine.abs_line_offset()

331 if use_default == 2:

332 self.nested_sm_cache.append(my_state_machine)

333 else:

334 my_state_machine.unlink()

335 # No `block.parent` implies disconnected -- lines aren't in sync:

336 if block.parent and (len(block) - block_length) != 0:

337 # Adjustment for block if modified in nested parse:

338 self.state_machine.next_line(len(block) - block_length)

339 return new_offset

340

341 def nested_list_parse(self, block, input_offset, node, initial_state,

342 blank_finish,

343 blank_finish_state=None,

344 extra_settings={},

345 match_titles=False, # deprecated, will be removed

346 state_machine_class=None,

347 state_machine_kwargs=None):

348 """

349 Parse the input `block` with a nested state-machine rooted at `node`.

350

351 Create a new StateMachine rooted at `node` and run it over the

352 input `block` (see also `nested_parse()`).

353 Also keep track of optional intermediate blank lines and the

354 required final one.

355

356 Return new offset and a boolean indicating whether there was a

357 blank final line.

358 """

359 if match_titles:

360 warnings.warn('The "match_titles" argument of '

361 'parsers.rst.states.RSTState.nested_list_parse() '

362 'will be ignored in Docutils 1.0 '

363 'and removed in Docutils 2.0.',

364 PendingDeprecationWarning, stacklevel=2)

365 if state_machine_class is None:

366 state_machine_class = self.nested_sm

367 if state_machine_kwargs is None:

368 state_machine_kwargs = self.nested_sm_kwargs.copy()

369 state_machine_kwargs['initial_state'] = initial_state

370 my_state_machine = state_machine_class(

371 debug=self.debug,

372 parent_state_machine=self.state_machine,

373 **state_machine_kwargs)

374 if blank_finish_state is None:

375 blank_finish_state = initial_state

376 my_state_machine.states[blank_finish_state].blank_finish = blank_finish

377 for key, value in extra_settings.items():

378 setattr(my_state_machine.states[initial_state], key, value)

379 my_state_machine.run(block, input_offset, memo=self.memo,

380 node=node, match_titles=match_titles)

381 blank_finish = my_state_machine.states[blank_finish_state].blank_finish

382 my_state_machine.unlink()

383 return my_state_machine.abs_line_offset(), blank_finish

384

385 def section(self, title, source, style, lineno, messages) -> None:

386 """Check for a valid subsection and create one if it checks out."""

387 if self.check_subsection(source, style, lineno):

388 self.new_subsection(title, lineno, messages)

389

390 def check_subsection(self, source, style, lineno) -> bool:

391 """

392 Check for a valid subsection header. Update section data in `memo`.

393

394 When a new section is reached that isn't a subsection of the current

395 section, set `self.parent` to the new section's parent section

396 (or the root node if the new section is a top-level section).

397 """

398 title_styles = self.memo.title_styles

399 parent_sections = self.parent.section_hierarchy()

400 # current section level: (0 root, 1 section, 2 subsection, ...)

401 oldlevel = len(parent_sections)

402 # new section level:

403 try: # check for existing title style

404 newlevel = title_styles.index(style) + 1

405 except ValueError: # new title style

406 newlevel = len(title_styles) + 1

407 # The new level must not be deeper than an immediate child

408 # of the current level:

409 if newlevel > oldlevel + 1:

410 styles = ' '.join('/'.join(style) for style in title_styles)

411 self.parent += self.reporter.error(

412 'Inconsistent title style:'

413 f' skip from level {oldlevel} to {newlevel}.',

414 nodes.literal_block('', source),

415 nodes.paragraph('', f'Established title styles: {styles}'),

416 line=lineno)

417 return False

418 # Update parent state:

419 if newlevel > len(title_styles):

420 title_styles.append(style)

421 self.memo.section_level = newlevel

422 if newlevel <= oldlevel:

423 # new section is sibling or higher up in the section hierarchy

424 self.parent = parent_sections[newlevel-1].parent

425 return True

426

427 def title_inconsistent(self, sourcetext, lineno):

428 # Ignored. Will be removed in Docutils 2.0.

429 error = self.reporter.error(

430 'Title level inconsistent:', nodes.literal_block('', sourcetext),

431 line=lineno)

432 return error

433

434 def new_subsection(self, title, lineno, messages):

435 """Append new subsection to document tree."""

436 section_node = nodes.section()

437 self.parent += section_node

438 textnodes, title_messages = self.inline_text(title, lineno)

439 titlenode = nodes.title(title, '', *textnodes)

440 name = normalize_name(titlenode.astext())

441 section_node['names'].append(name)

442 section_node += titlenode

443 section_node += messages

444 section_node += title_messages

445 self.document.note_implicit_target(section_node, section_node)

446 # Update state:

447 self.parent = section_node

448

449 def paragraph(self, lines, lineno):

450 """

451 Return a list (paragraph & messages) & a boolean: literal_block next?

452 """

453 data = '\n'.join(lines).rstrip()

454 if re.search(r'(?<!\\)(\\\\)*::$', data):

455 if len(data) == 2:

456 return [], 1

457 elif data[-3] in ' \n':

458 text = data[:-3].rstrip()

459 else:

460 text = data[:-1]

461 literalnext = 1

462 else:

463 text = data

464 literalnext = 0

465 textnodes, messages = self.inline_text(text, lineno)

466 p = nodes.paragraph(data, '', *textnodes)

467 p.source, p.line = self.state_machine.get_source_and_line(lineno)

468 return [p] + messages, literalnext

469

470 def inline_text(self, text, lineno):

471 """

472 Return 2 lists: nodes (text and inline elements), and system_messages.

473 """

474 nodes, messages = self.inliner.parse(text, lineno,

475 self.memo, self.parent)

476 return nodes, messages

477

478 def unindent_warning(self, node_name):

479 # the actual problem is one line below the current line

480 lineno = self.state_machine.abs_line_number() + 1

481 return self.reporter.warning('%s ends without a blank line; '

482 'unexpected unindent.' % node_name,

483 line=lineno)

484

485

486def build_regexp(definition, compile_patterns=True):

487 """

488 Build, compile and return a regular expression based on `definition`.

489

490 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),

491 where "parts" is a list of regular expressions and/or regular

492 expression definitions to be joined into an or-group.

493 """

494 name, prefix, suffix, parts = definition

495 part_strings = []

496 for part in parts:

497 if isinstance(part, tuple):

498 part_strings.append(build_regexp(part, None))

499 else:

500 part_strings.append(part)

501 or_group = '|'.join(part_strings)

502 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()

503 if compile_patterns:

504 return re.compile(regexp)

505 else:

506 return regexp

507

508

509class Inliner:

510

511 """

512 Parse inline markup; call the `parse()` method.

513 """

514

515 def __init__(self) -> None:

516 self.implicit_dispatch = []

517 """List of (pattern, bound method) tuples, used by

518 `self.implicit_inline`."""

519

520 def init_customizations(self, settings) -> None:

521 # lookahead and look-behind expressions for inline markup rules

522 if getattr(settings, 'character_level_inline_markup', False):

523 start_string_prefix = '(^|(?<!\x00))'

524 end_string_suffix = ''

525 else:

526 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %

527 (punctuation_chars.openers,

528 punctuation_chars.delimiters))

529 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %

530 (punctuation_chars.closing_delimiters,

531 punctuation_chars.delimiters,

532 punctuation_chars.closers))

533 args = locals().copy()

534 args.update(vars(self.__class__))

535

536 parts = ('initial_inline', start_string_prefix, '',

537 [

538 ('start', '', self.non_whitespace_after, # simple start-strings

539 [r'\*\*', # strong

540 r'\*(?!\*)', # emphasis but not strong

541 r'``', # literal

542 r'_`', # inline internal target

543 r'\|(?!\|)'] # substitution reference

544 ),

545 ('whole', '', end_string_suffix, # whole constructs

546 [ # reference name & end-string

547 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,

548 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',

549 [r'[0-9]+', # manually numbered

550 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)

551 r'\*', # auto-symbol

552 r'(?P<citationlabel>%s)' % self.simplename, # citation ref

553 ]

554 )

555 ]

556 ),

557 ('backquote', # interpreted text or phrase reference

558 '(?P<role>(:%s:)?)' % self.simplename, # optional role

559 self.non_whitespace_after,

560 ['`(?!`)'] # but not literal

561 )

562 ]

563 )

564 self.start_string_prefix = start_string_prefix

565 self.end_string_suffix = end_string_suffix

566 self.parts = parts

567

568 self.patterns = Struct(

569 initial=build_regexp(parts),

570 emphasis=re.compile(self.non_whitespace_escape_before

571 + r'(\*)' + end_string_suffix),

572 strong=re.compile(self.non_whitespace_escape_before

573 + r'(\*\*)' + end_string_suffix),

574 interpreted_or_phrase_ref=re.compile(

575 r"""

576 %(non_unescaped_whitespace_escape_before)s

577 (

578 `

579 (?P<suffix>

580 (?P<role>:%(simplename)s:)?

581 (?P<refend>__?)?

582 )

583 )

584 %(end_string_suffix)s

585 """ % args, re.VERBOSE),

586 embedded_link=re.compile(

587 r"""

588 (

589 (?:[ \n]+|^) # spaces or beginning of line/string

590 < # open bracket

591 %(non_whitespace_after)s

592 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets

593 %(non_whitespace_escape_before)s

594 > # close bracket

595 )

596 $ # end of string

597 """ % args, re.VERBOSE),

598 literal=re.compile(self.non_whitespace_before + '(``)'

599 + end_string_suffix),

600 target=re.compile(self.non_whitespace_escape_before

601 + r'(`)' + end_string_suffix),

602 substitution_ref=re.compile(self.non_whitespace_escape_before

603 + r'(\|_{0,2})'

604 + end_string_suffix),

605 email=re.compile(self.email_pattern % args + '$',

606 re.VERBOSE),

607 uri=re.compile(

608 (r"""

609 %(start_string_prefix)s

610 (?P<whole>

611 (?P<absolute> # absolute URI

612 (?P<scheme> # scheme (http, ftp, mailto)

613 [a-zA-Z][a-zA-Z0-9.+-]*

614 )

615 :

616 (

617 ( # either:

618 (//?)? # hierarchical URI

619 %(uric)s* # URI characters

620 %(uri_end)s # final URI char

621 )

622 ( # optional query

623 \?%(uric)s*

624 %(uri_end)s

625 )?

626 ( # optional fragment

627 \#%(uric)s*

628 %(uri_end)s

629 )?

630 )

631 )

632 | # *OR*

633 (?P<email> # email address

634 """ + self.email_pattern + r"""

635 )

636 )

637 %(end_string_suffix)s

638 """) % args, re.VERBOSE),

639 pep=re.compile(

640 r"""

641 %(start_string_prefix)s

642 (

643 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file

644 |

645 (PEP\s+(?P<pepnum2>\d+)) # reference by name

646 )

647 %(end_string_suffix)s""" % args, re.VERBOSE),

648 rfc=re.compile(

649 r"""

650 %(start_string_prefix)s

651 (RFC(-|\s+)?(?P<rfcnum>\d+))

652 %(end_string_suffix)s""" % args, re.VERBOSE))

653

654 self.implicit_dispatch.append((self.patterns.uri,

655 self.standalone_uri))

656 if settings.pep_references:

657 self.implicit_dispatch.append((self.patterns.pep,

658 self.pep_reference))

659 if settings.rfc_references:

660 self.implicit_dispatch.append((self.patterns.rfc,

661 self.rfc_reference))

662

663 def parse(self, text, lineno, memo, parent):

664 # Needs to be refactored for nested inline markup.

665 # Add nested_parse() method?

666 """

667 Return 2 lists: nodes (text and inline elements), and system_messages.

668

669 Using `self.patterns.initial`, a pattern which matches start-strings

670 (emphasis, strong, interpreted, phrase reference, literal,

671 substitution reference, and inline target) and complete constructs

672 (simple reference, footnote reference), search for a candidate. When

673 one is found, check for validity (e.g., not a quoted '*' character).

674 If valid, search for the corresponding end string if applicable, and

675 check it for validity. If not found or invalid, generate a warning

676 and ignore the start-string. Implicit inline markup (e.g. standalone

677 URIs) is found last.

678

679 :text: source string

680 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`

681 """

682 self.document = memo.document

683 self.language = memo.language

684 self.reporter = self.document.reporter

685 self.parent = parent

686 pattern_search = self.patterns.initial.search

687 dispatch = self.dispatch

688 remaining = escape2null(text)

689 processed = []

690 unprocessed = []

691 messages = []

692 while remaining:

693 match = pattern_search(remaining)

694 if match:

695 groups = match.groupdict()

696 method = dispatch[groups['start'] or groups['backquote']

697 or groups['refend'] or groups['fnend']]

698 before, inlines, remaining, sysmessages = method(self, match,

699 lineno)

700 unprocessed.append(before)

701 messages += sysmessages

702 if inlines:

703 processed += self.implicit_inline(''.join(unprocessed),

704 lineno)

705 processed += inlines

706 unprocessed = []

707 else:

708 break

709 remaining = ''.join(unprocessed) + remaining

710 if remaining:

711 processed += self.implicit_inline(remaining, lineno)

712 return processed, messages

713

714 # Inline object recognition

715 # -------------------------

716 # See also init_customizations().

717 non_whitespace_before = r'(?<!\s)'

718 non_whitespace_escape_before = r'(?<![\s\x00])'

719 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'

720 non_whitespace_after = r'(?!\s)'

721 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):

722 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'

723 # Valid URI characters (see RFC 2396 & RFC 2732);

724 # final \x00 allows backslash escapes in URIs:

725 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""

726 # Delimiter indicating the end of a URI (not part of the URI):

727 uri_end_delim = r"""[>]"""

728 # Last URI character; same as uric but no punctuation:

729 urilast = r"""[_~*/=+a-zA-Z0-9]"""

730 # End of a URI (either 'urilast' or 'uric followed by a

731 # uri_end_delim'):

732 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()

733 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""

734 email_pattern = r"""

735 %(emailc)s+(?:\.%(emailc)s+)* # name

736 (?<!\x00)@ # at

737 %(emailc)s+(?:\.%(emailc)s*)* # host

738 %(uri_end)s # final URI char

739 """

740

741 def quoted_start(self, match):

742 """Test if inline markup start-string is 'quoted'.

743

744 'Quoted' in this context means the start-string is enclosed in a pair

745 of matching opening/closing delimiters (not necessarily quotes)

746 or at the end of the match.

747 """

748 string = match.string

749 start = match.start()

750 if start == 0: # start-string at beginning of text

751 return False

752 prestart = string[start - 1]

753 try:

754 poststart = string[match.end()]

755 except IndexError: # start-string at end of text

756 return True # not "quoted" but no markup start-string either

757 return punctuation_chars.match_chars(prestart, poststart)

758

759 def inline_obj(self, match, lineno, end_pattern, nodeclass,

760 restore_backslashes=False):

761 string = match.string

762 matchstart = match.start('start')

763 matchend = match.end('start')

764 if self.quoted_start(match):

765 return string[:matchend], [], string[matchend:], [], ''

766 endmatch = end_pattern.search(string[matchend:])

767 if endmatch and endmatch.start(1): # 1 or more chars

768 text = endmatch.string[:endmatch.start(1)]

769 if restore_backslashes:

770 text = unescape(text, True)

771 textend = matchend + endmatch.end(1)

772 rawsource = unescape(string[matchstart:textend], True)

773 node = nodeclass(rawsource, text)

774 return (string[:matchstart], [node],

775 string[textend:], [], endmatch.group(1))

776 msg = self.reporter.warning(

777 'Inline %s start-string without end-string.'

778 % nodeclass.__name__, line=lineno)

779 text = unescape(string[matchstart:matchend], True)

780 prb = self.problematic(text, text, msg)

781 return string[:matchstart], [prb], string[matchend:], [msg], ''

782

783 def problematic(self, text, rawsource, message):

784 msgid = self.document.set_id(message, self.parent)

785 problematic = nodes.problematic(rawsource, text, refid=msgid)

786 prbid = self.document.set_id(problematic)

787 message.add_backref(prbid)

788 return problematic

789

790 def emphasis(self, match, lineno):

791 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

792 match, lineno, self.patterns.emphasis, nodes.emphasis)

793 return before, inlines, remaining, sysmessages

794

795 def strong(self, match, lineno):

796 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

797 match, lineno, self.patterns.strong, nodes.strong)

798 return before, inlines, remaining, sysmessages

799

800 def interpreted_or_phrase_ref(self, match, lineno):

801 end_pattern = self.patterns.interpreted_or_phrase_ref

802 string = match.string

803 matchstart = match.start('backquote')

804 matchend = match.end('backquote')

805 rolestart = match.start('role')

806 role = match.group('role')

807 position = ''

808 if role:

809 role = role[1:-1]

810 position = 'prefix'

811 elif self.quoted_start(match):

812 return string[:matchend], [], string[matchend:], []

813 endmatch = end_pattern.search(string[matchend:])

814 if endmatch and endmatch.start(1): # 1 or more chars

815 textend = matchend + endmatch.end()

816 if endmatch.group('role'):

817 if role:

818 msg = self.reporter.warning(

819 'Multiple roles in interpreted text (both '

820 'prefix and suffix present; only one allowed).',

821 line=lineno)

822 text = unescape(string[rolestart:textend], True)

823 prb = self.problematic(text, text, msg)

824 return string[:rolestart], [prb], string[textend:], [msg]

825 role = endmatch.group('suffix')[1:-1]

826 position = 'suffix'

827 escaped = endmatch.string[:endmatch.start(1)]

828 rawsource = unescape(string[matchstart:textend], True)

829 if rawsource[-1:] == '_':

830 if role:

831 msg = self.reporter.warning(

832 'Mismatch: both interpreted text role %s and '

833 'reference suffix.' % position, line=lineno)

834 text = unescape(string[rolestart:textend], True)

835 prb = self.problematic(text, text, msg)

836 return string[:rolestart], [prb], string[textend:], [msg]

837 return self.phrase_ref(string[:matchstart], string[textend:],

838 rawsource, escaped)

839 else:

840 rawsource = unescape(string[rolestart:textend], True)

841 nodelist, messages = self.interpreted(rawsource, escaped, role,

842 lineno)

843 return (string[:rolestart], nodelist,

844 string[textend:], messages)

845 msg = self.reporter.warning(

846 'Inline interpreted text or phrase reference start-string '

847 'without end-string.', line=lineno)

848 text = unescape(string[matchstart:matchend], True)

849 prb = self.problematic(text, text, msg)

850 return string[:matchstart], [prb], string[matchend:], [msg]

851

852 def phrase_ref(self, before, after, rawsource, escaped, text=None):

853 # `text` is ignored (since 0.16)

854 match = self.patterns.embedded_link.search(escaped)

855 if match: # embedded <URI> or <alias_>

856 text = escaped[:match.start(0)]

857 unescaped = unescape(text)

858 rawtext = unescape(text, True)

859 aliastext = match.group(2)

860 rawaliastext = unescape(aliastext, True)

861 underscore_escaped = rawaliastext.endswith(r'\_')

862 if (aliastext.endswith('_')

863 and not (underscore_escaped

864 or self.patterns.uri.match(aliastext))):

865 aliastype = 'name'

866 alias = normalize_name(unescape(aliastext[:-1]))

867 target = nodes.target(match.group(1), refname=alias)

868 target.indirect_reference_name = whitespace_normalize_name(

869 unescape(aliastext[:-1]))

870 else:

871 aliastype = 'uri'

872 # remove unescaped whitespace

873 alias_parts = split_escaped_whitespace(match.group(2))

874 alias = ' '.join(''.join(part.split())

875 for part in alias_parts)

876 alias = self.adjust_uri(unescape(alias))

877 if alias.endswith(r'\_'):

878 alias = alias[:-2] + '_'

879 target = nodes.target(match.group(1), refuri=alias)

880 target.referenced = 1

881 if not aliastext:

882 raise ApplicationError('problem with embedded link: %r'

883 % aliastext)

884 if not text:

885 text = alias

886 unescaped = unescape(text)

887 rawtext = rawaliastext

888 else:

889 text = escaped

890 unescaped = unescape(text)

891 target = None

892 rawtext = unescape(escaped, True)

893

894 refname = normalize_name(unescaped)

895 reference = nodes.reference(rawsource, text,

896 name=whitespace_normalize_name(unescaped))

897 reference[0].rawsource = rawtext

898

899 node_list = [reference]

900

901 if rawsource[-2:] == '__':

902 if target and (aliastype == 'name'):

903 reference['refname'] = alias

904 self.document.note_refname(reference)

905 # self.document.note_indirect_target(target) # required?

906 elif target and (aliastype == 'uri'):

907 reference['refuri'] = alias

908 else:

909 reference['anonymous'] = True

910 else:

911 if target:

912 target['names'].append(refname)

913 if aliastype == 'name':

914 reference['refname'] = alias

915 self.document.note_indirect_target(target)

916 self.document.note_refname(reference)

917 else:

918 reference['refuri'] = alias

919 # target.note_referenced_by(name=refname)

920 self.document.note_implicit_target(target, self.parent)

921 node_list.append(target)

922 else:

923 reference['refname'] = refname

924 self.document.note_refname(reference)

925 return before, node_list, after, []

926

927 def adjust_uri(self, uri):

928 match = self.patterns.email.match(uri)

929 if match:

930 return 'mailto:' + uri

931 else:

932 return uri

933

934 def interpreted(self, rawsource, text, role, lineno):

935 role_fn, messages = roles.role(role, self.language, lineno,

936 self.reporter)

937 if role_fn:

938 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)

939 return nodes, messages + messages2

940 else:

941 msg = self.reporter.error(

942 'Unknown interpreted text role "%s".' % role,

943 line=lineno)

944 return ([self.problematic(rawsource, rawsource, msg)],

945 messages + [msg])

946

947 def literal(self, match, lineno):

948 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

949 match, lineno, self.patterns.literal, nodes.literal,

950 restore_backslashes=True)

951 return before, inlines, remaining, sysmessages

952

953 def inline_internal_target(self, match, lineno):

954 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

955 match, lineno, self.patterns.target, nodes.target)

956 if inlines and isinstance(inlines[0], nodes.target):

957 assert len(inlines) == 1

958 target = inlines[0]

959 name = normalize_name(target.astext())

960 target['names'].append(name)

961 self.document.note_explicit_target(target, self.parent)

962 return before, inlines, remaining, sysmessages

963

964 def substitution_reference(self, match, lineno):

965 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

966 match, lineno, self.patterns.substitution_ref,

967 nodes.substitution_reference)

968 if len(inlines) == 1:

969 subref_node = inlines[0]

970 if isinstance(subref_node, nodes.substitution_reference):

971 subref_text = subref_node.astext()

972 self.document.note_substitution_ref(subref_node, subref_text)

973 if endstring[-1:] == '_':

974 reference_node = nodes.reference(

975 '|%s%s' % (subref_text, endstring), '')

976 if endstring[-2:] == '__':

977 reference_node['anonymous'] = True

978 else:

979 reference_node['refname'] = normalize_name(subref_text)

980 self.document.note_refname(reference_node)

981 reference_node += subref_node

982 inlines = [reference_node]

983 return before, inlines, remaining, sysmessages

984

985 def footnote_reference(self, match, lineno):

986 """

987 Handles `nodes.footnote_reference` and `nodes.citation_reference`

988 elements.

989 """

990 label = match.group('footnotelabel')

991 refname = normalize_name(label)

992 string = match.string

993 before = string[:match.start('whole')]

994 remaining = string[match.end('whole'):]

995 if match.group('citationlabel'):

996 refnode = nodes.citation_reference('[%s]_' % label,

997 refname=refname)

998 refnode += nodes.Text(label)

999 self.document.note_citation_ref(refnode)

1000 else:

1001 refnode = nodes.footnote_reference('[%s]_' % label)

1002 if refname[0] == '#':

1003 refname = refname[1:]

1004 refnode['auto'] = 1

1005 self.document.note_autofootnote_ref(refnode)

1006 elif refname == '*':

1007 refname = ''

1008 refnode['auto'] = '*'

1009 self.document.note_symbol_footnote_ref(

1010 refnode)

1011 else:

1012 refnode += nodes.Text(label)

1013 if refname:

1014 refnode['refname'] = refname

1015 self.document.note_footnote_ref(refnode)

1016 if utils.get_trim_footnote_ref_space(self.document.settings):

1017 before = before.rstrip()

1018 return before, [refnode], remaining, []

1019

1020 def reference(self, match, lineno, anonymous=False):

1021 referencename = match.group('refname')

1022 refname = normalize_name(referencename)

1023 referencenode = nodes.reference(

1024 referencename + match.group('refend'), referencename,

1025 name=whitespace_normalize_name(referencename))

1026 referencenode[0].rawsource = referencename

1027 if anonymous:

1028 referencenode['anonymous'] = True

1029 else:

1030 referencenode['refname'] = refname

1031 self.document.note_refname(referencenode)

1032 string = match.string

1033 matchstart = match.start('whole')

1034 matchend = match.end('whole')

1035 return string[:matchstart], [referencenode], string[matchend:], []

1036

1037 def anonymous_reference(self, match, lineno):

1038 return self.reference(match, lineno, anonymous=True)

1039

1040 def standalone_uri(self, match, lineno):

1041 if (not match.group('scheme')

1042 or match.group('scheme').lower() in urischemes.schemes):

1043 if match.group('email'):

1044 addscheme = 'mailto:'

1045 else:

1046 addscheme = ''

1047 text = match.group('whole')

1048 refuri = addscheme + unescape(text)

1049 reference = nodes.reference(unescape(text, True), text,

1050 refuri=refuri)

1051 return [reference]

1052 else: # not a valid scheme

1053 raise MarkupMismatch

1054

1055 def pep_reference(self, match, lineno):

1056 text = match.group(0)

1057 if text.startswith('pep-'):

1058 pepnum = int(unescape(match.group('pepnum1')))

1059 elif text.startswith('PEP'):

1060 pepnum = int(unescape(match.group('pepnum2')))

1061 else:

1062 raise MarkupMismatch

1063 ref = (self.document.settings.pep_base_url

1064 + self.document.settings.pep_file_url_template % pepnum)

1065 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1066

1067 rfc_url = 'rfc%d.html'

1068

1069 def rfc_reference(self, match, lineno):

1070 text = match.group(0)

1071 if text.startswith('RFC'):

1072 rfcnum = int(unescape(match.group('rfcnum')))

1073 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum

1074 else:

1075 raise MarkupMismatch

1076 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1077

1078 def implicit_inline(self, text, lineno):

1079 """

1080 Check each of the patterns in `self.implicit_dispatch` for a match,

1081 and dispatch to the stored method for the pattern. Recursively check

1082 the text before and after the match. Return a list of `nodes.Text`

1083 and inline element nodes.

1084 """

1085 if not text:

1086 return []

1087 for pattern, method in self.implicit_dispatch:

1088 match = pattern.search(text)

1089 if match:

1090 try:

1091 # Must recurse on strings before *and* after the match;

1092 # there may be multiple patterns.

1093 return (self.implicit_inline(text[:match.start()], lineno)

1094 + method(match, lineno)

1095 + self.implicit_inline(text[match.end():], lineno))

1096 except MarkupMismatch:

1097 pass

1098 return [nodes.Text(text)]

1099

1100 dispatch = {'*': emphasis,

1101 '**': strong,

1102 '`': interpreted_or_phrase_ref,

1103 '``': literal,

1104 '_`': inline_internal_target,

1105 ']_': footnote_reference,

1106 '|': substitution_reference,

1107 '_': reference,

1108 '__': anonymous_reference}

1109

1110

1111def _loweralpha_to_int(s, _zero=(ord('a')-1)):

1112 return ord(s) - _zero

1113

1114

1115def _upperalpha_to_int(s, _zero=(ord('A')-1)):

1116 return ord(s) - _zero

1117

1118

1119class Body(RSTState):

1120

1121 """

1122 Generic classifier of the first line of a block.

1123 """

1124

1125 double_width_pad_char = tableparser.TableParser.double_width_pad_char

1126 """Padding character for East Asian double-width text."""

1127

1128 enum = Struct()

1129 """Enumerated list parsing information."""

1130

1131 enum.formatinfo = {

1132 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),

1133 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),

1134 'period': Struct(prefix='', suffix='.', start=0, end=-1)}

1135 enum.formats = enum.formatinfo.keys()

1136 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',

1137 'lowerroman', 'upperroman'] # ORDERED!

1138 enum.sequencepats = {'arabic': '[0-9]+',

1139 'loweralpha': '[a-z]',

1140 'upperalpha': '[A-Z]',

1141 'lowerroman': '[ivxlcdm]+',

1142 'upperroman': '[IVXLCDM]+'}

1143 enum.converters = {'arabic': int,

1144 'loweralpha': _loweralpha_to_int,

1145 'upperalpha': _upperalpha_to_int,

1146 'lowerroman': RomanNumeral.from_string,

1147 'upperroman': RomanNumeral.from_string}

1148

1149 enum.sequenceregexps = {}

1150 for sequence in enum.sequences:

1151 enum.sequenceregexps[sequence] = re.compile(

1152 enum.sequencepats[sequence] + '$')

1153

1154 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')

1155 """Matches the top (& bottom) of a full table)."""

1156

1157 simple_table_top_pat = re.compile('=+( +=+)+ *$')

1158 """Matches the top of a simple table."""

1159

1160 simple_table_border_pat = re.compile('=+[ =]*$')

1161 """Matches the bottom & header bottom of a simple table."""

1162

1163 pats = {}

1164 """Fragments of patterns used by transitions."""

1165

1166 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'

1167 pats['alpha'] = '[a-zA-Z]'

1168 pats['alphanum'] = '[a-zA-Z0-9]'

1169 pats['alphanumplus'] = '[a-zA-Z0-9_-]'

1170 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'

1171 '|%(upperroman)s|#)' % enum.sequencepats)

1172 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats

1173 # @@@ Loosen up the pattern? Allow Unicode?

1174 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats

1175 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats

1176 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats

1177 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats

1178

1179 for format in enum.formats:

1180 pats[format] = '(?P<%s>%s%s%s)' % (

1181 format, re.escape(enum.formatinfo[format].prefix),

1182 pats['enum'], re.escape(enum.formatinfo[format].suffix))

1183

1184 patterns = {

1185 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',

1186 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,

1187 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',

1188 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,

1189 'doctest': r'>>>( +|$)',

1190 'line_block': r'\|( +|$)',

1191 'grid_table_top': grid_table_top_pat,

1192 'simple_table_top': simple_table_top_pat,

1193 'explicit_markup': r'\.\.( +|$)',

1194 'anonymous': r'__( +|$)',

1195 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,

1196 'text': r''}

1197 initial_transitions = (

1198 'bullet',

1199 'enumerator',

1200 'field_marker',

1201 'option_marker',

1202 'doctest',

1203 'line_block',

1204 'grid_table_top',

1205 'simple_table_top',

1206 'explicit_markup',

1207 'anonymous',

1208 'line',

1209 'text')

1210

1211 def indent(self, match, context, next_state):

1212 """Block quote."""

1213 (indented, indent, line_offset, blank_finish

1214 ) = self.state_machine.get_indented()

1215 elements = self.block_quote(indented, line_offset)

1216 self.parent += elements

1217 if not blank_finish:

1218 self.parent += self.unindent_warning('Block quote')

1219 return context, next_state, []

1220

1221 def block_quote(self, indented, line_offset):

1222 elements = []

1223 while indented:

1224 blockquote = nodes.block_quote(rawsource='\n'.join(indented))

1225 (blockquote.source, blockquote.line

1226 ) = self.state_machine.get_source_and_line(line_offset+1)

1227 (blockquote_lines,

1228 attribution_lines,

1229 attribution_offset,

1230 indented,

1231 new_line_offset) = self.split_attribution(indented, line_offset)

1232 self.nested_parse(blockquote_lines, line_offset, blockquote)

1233 elements.append(blockquote)

1234 if attribution_lines:

1235 attribution, messages = self.parse_attribution(

1236 attribution_lines, line_offset+attribution_offset)

1237 blockquote += attribution

1238 elements += messages

1239 line_offset = new_line_offset

1240 while indented and not indented[0]:

1241 indented = indented[1:]

1242 line_offset += 1

1243 return elements

1244

1245 # U+2014 is an em-dash:

1246 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')

1247

1248 def split_attribution(self, indented, line_offset):

1249 """

1250 Check for a block quote attribution and split it off:

1251

1252 * First line after a blank line must begin with a dash ("--", "---",

1253 em-dash; matches `self.attribution_pattern`).

1254 * Every line after that must have consistent indentation.

1255 * Attributions must be preceded by block quote content.

1256

1257 Return a tuple of: (block quote content lines, attribution lines,

1258 attribution offset, remaining indented lines, remaining lines offset).

1259 """

1260 blank = None

1261 nonblank_seen = False

1262 for i in range(len(indented)):

1263 line = indented[i].rstrip()

1264 if line:

1265 if nonblank_seen and blank == i - 1: # last line blank

1266 match = self.attribution_pattern.match(line)

1267 if match:

1268 attribution_end, indent = self.check_attribution(

1269 indented, i)

1270 if attribution_end:

1271 a_lines = indented[i:attribution_end]

1272 a_lines.trim_left(match.end(), end=1)

1273 a_lines.trim_left(indent, start=1)

1274 return (indented[:i], a_lines,

1275 i, indented[attribution_end:],

1276 line_offset + attribution_end)

1277 nonblank_seen = True

1278 else:

1279 blank = i

1280 else:

1281 return indented, None, None, None, None

1282

1283 def check_attribution(self, indented, attribution_start):

1284 """

1285 Check attribution shape.

1286 Return the index past the end of the attribution, and the indent.

1287 """

1288 indent = None

1289 i = attribution_start + 1

1290 for i in range(attribution_start + 1, len(indented)):

1291 line = indented[i].rstrip()

1292 if not line:

1293 break

1294 if indent is None:

1295 indent = len(line) - len(line.lstrip())

1296 elif len(line) - len(line.lstrip()) != indent:

1297 return None, None # bad shape; not an attribution

1298 else:

1299 # return index of line after last attribution line:

1300 i += 1

1301 return i, (indent or 0)

1302

1303 def parse_attribution(self, indented, line_offset):

1304 text = '\n'.join(indented).rstrip()

1305 lineno = 1 + line_offset # line_offset is zero-based

1306 textnodes, messages = self.inline_text(text, lineno)

1307 node = nodes.attribution(text, '', *textnodes)

1308 node.source, node.line = self.state_machine.get_source_and_line(lineno)

1309 return node, messages

1310

1311 def bullet(self, match, context, next_state):

1312 """Bullet list item."""

1313 ul = nodes.bullet_list()

1314 ul.source, ul.line = self.state_machine.get_source_and_line()

1315 self.parent += ul

1316 ul['bullet'] = match.string[0]

1317 i, blank_finish = self.list_item(match.end())

1318 ul += i

1319 offset = self.state_machine.line_offset + 1 # next line

1320 new_line_offset, blank_finish = self.nested_list_parse(

1321 self.state_machine.input_lines[offset:],

1322 input_offset=self.state_machine.abs_line_offset() + 1,

1323 node=ul, initial_state='BulletList',

1324 blank_finish=blank_finish)

1325 self.goto_line(new_line_offset)

1326 if not blank_finish:

1327 self.parent += self.unindent_warning('Bullet list')

1328 return [], next_state, []

1329

1330 def list_item(self, indent):

1331 src, srcline = self.state_machine.get_source_and_line()

1332 if self.state_machine.line[indent:]:

1333 indented, line_offset, blank_finish = (

1334 self.state_machine.get_known_indented(indent))

1335 else:

1336 indented, indent, line_offset, blank_finish = (

1337 self.state_machine.get_first_known_indented(indent))

1338 listitem = nodes.list_item('\n'.join(indented))

1339 listitem.source, listitem.line = src, srcline

1340 if indented:

1341 self.nested_parse(indented, input_offset=line_offset,

1342 node=listitem)

1343 return listitem, blank_finish

1344

1345 def enumerator(self, match, context, next_state):

1346 """Enumerated List Item"""

1347 format, sequence, text, ordinal = self.parse_enumerator(match)

1348 if not self.is_enumerated_list_item(ordinal, sequence, format):

1349 raise statemachine.TransitionCorrection('text')

1350 enumlist = nodes.enumerated_list()

1351 (enumlist.source,

1352 enumlist.line) = self.state_machine.get_source_and_line()

1353 self.parent += enumlist

1354 if sequence == '#':

1355 enumlist['enumtype'] = 'arabic'

1356 else:

1357 enumlist['enumtype'] = sequence

1358 enumlist['prefix'] = self.enum.formatinfo[format].prefix

1359 enumlist['suffix'] = self.enum.formatinfo[format].suffix

1360 if ordinal != 1:

1361 enumlist['start'] = ordinal

1362 msg = self.reporter.info(

1363 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'

1364 % (text, ordinal), base_node=enumlist)

1365 self.parent += msg

1366 listitem, blank_finish = self.list_item(match.end())

1367 enumlist += listitem

1368 offset = self.state_machine.line_offset + 1 # next line

1369 newline_offset, blank_finish = self.nested_list_parse(

1370 self.state_machine.input_lines[offset:],

1371 input_offset=self.state_machine.abs_line_offset() + 1,

1372 node=enumlist, initial_state='EnumeratedList',

1373 blank_finish=blank_finish,

1374 extra_settings={'lastordinal': ordinal,

1375 'format': format,

1376 'auto': sequence == '#'})

1377 self.goto_line(newline_offset)

1378 if not blank_finish:

1379 self.parent += self.unindent_warning('Enumerated list')

1380 return [], next_state, []

1381

1382 def parse_enumerator(self, match, expected_sequence=None):

1383 """

1384 Analyze an enumerator and return the results.

1385

1386 :Return:

1387 - the enumerator format ('period', 'parens', or 'rparen'),

1388 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),

1389 - the text of the enumerator, stripped of formatting, and

1390 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;

1391 ``None`` is returned for invalid enumerator text).

1392

1393 The enumerator format has already been determined by the regular

1394 expression match. If `expected_sequence` is given, that sequence is

1395 tried first. If not, we check for Roman numeral 1. This way,

1396 single-character Roman numerals (which are also alphabetical) can be

1397 matched. If no sequence has been matched, all sequences are checked in

1398 order.

1399 """

1400 groupdict = match.groupdict()

1401 sequence = ''

1402 for format in self.enum.formats:

1403 if groupdict[format]: # was this the format matched?

1404 break # yes; keep `format`

1405 else: # shouldn't happen

1406 raise ParserError('enumerator format not matched')

1407 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501

1408 : self.enum.formatinfo[format].end]

1409 if text == '#':

1410 sequence = '#'

1411 elif expected_sequence:

1412 try:

1413 if self.enum.sequenceregexps[expected_sequence].match(text):

1414 sequence = expected_sequence

1415 except KeyError: # shouldn't happen

1416 raise ParserError('unknown enumerator sequence: %s'

1417 % sequence)

1418 elif text == 'i':

1419 sequence = 'lowerroman'

1420 elif text == 'I':

1421 sequence = 'upperroman'

1422 if not sequence:

1423 for sequence in self.enum.sequences:

1424 if self.enum.sequenceregexps[sequence].match(text):

1425 break

1426 else: # shouldn't happen

1427 raise ParserError('enumerator sequence not matched')

1428 if sequence == '#':

1429 ordinal = 1

1430 else:

1431 try:

1432 ordinal = int(self.enum.converters[sequence](text))

1433 except InvalidRomanNumeralError:

1434 ordinal = None

1435 return format, sequence, text, ordinal

1436

1437 def is_enumerated_list_item(self, ordinal, sequence, format):

1438 """

1439 Check validity based on the ordinal value and the second line.

1440

1441 Return true if the ordinal is valid and the second line is blank,

1442 indented, or starts with the next enumerator or an auto-enumerator.

1443 """

1444 if ordinal is None:

1445 return None

1446 try:

1447 next_line = self.state_machine.next_line()

1448 except EOFError: # end of input lines

1449 self.state_machine.previous_line()

1450 return 1

1451 else:

1452 self.state_machine.previous_line()

1453 if not next_line[:1].strip(): # blank or indented

1454 return 1

1455 result = self.make_enumerator(ordinal + 1, sequence, format)

1456 if result:

1457 next_enumerator, auto_enumerator = result

1458 try:

1459 if next_line.startswith((next_enumerator, auto_enumerator)):

1460 return 1

1461 except TypeError:

1462 pass

1463 return None

1464

1465 def make_enumerator(self, ordinal, sequence, format):

1466 """

1467 Construct and return the next enumerated list item marker, and an

1468 auto-enumerator ("#" instead of the regular enumerator).

1469

1470 Return ``None`` for invalid (out of range) ordinals.

1471 """

1472 if sequence == '#':

1473 enumerator = '#'

1474 elif sequence == 'arabic':

1475 enumerator = str(ordinal)

1476 else:

1477 if sequence.endswith('alpha'):

1478 if ordinal > 26:

1479 return None

1480 enumerator = chr(ordinal + ord('a') - 1)

1481 elif sequence.endswith('roman'):

1482 try:

1483 enumerator = RomanNumeral(ordinal).to_uppercase()

1484 except TypeError:

1485 return None

1486 else: # shouldn't happen

1487 raise ParserError('unknown enumerator sequence: "%s"'

1488 % sequence)

1489 if sequence.startswith('lower'):

1490 enumerator = enumerator.lower()

1491 elif sequence.startswith('upper'):

1492 enumerator = enumerator.upper()

1493 else: # shouldn't happen

1494 raise ParserError('unknown enumerator sequence: "%s"'

1495 % sequence)

1496 formatinfo = self.enum.formatinfo[format]

1497 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix

1498 + ' ')

1499 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '

1500 return next_enumerator, auto_enumerator

1501

1502 def field_marker(self, match, context, next_state):

1503 """Field list item."""

1504 field_list = nodes.field_list()

1505 self.parent += field_list

1506 field, blank_finish = self.field(match)

1507 field_list += field

1508 offset = self.state_machine.line_offset + 1 # next line

1509 newline_offset, blank_finish = self.nested_list_parse(

1510 self.state_machine.input_lines[offset:],

1511 input_offset=self.state_machine.abs_line_offset() + 1,

1512 node=field_list, initial_state='FieldList',

1513 blank_finish=blank_finish)

1514 self.goto_line(newline_offset)

1515 if not blank_finish:

1516 self.parent += self.unindent_warning('Field list')

1517 return [], next_state, []

1518

1519 def field(self, match):

1520 name = self.parse_field_marker(match)

1521 src, srcline = self.state_machine.get_source_and_line()

1522 lineno = self.state_machine.abs_line_number()

1523 (indented, indent, line_offset, blank_finish

1524 ) = self.state_machine.get_first_known_indented(match.end())

1525 field_node = nodes.field()

1526 field_node.source = src

1527 field_node.line = srcline

1528 name_nodes, name_messages = self.inline_text(name, lineno)

1529 field_node += nodes.field_name(name, '', *name_nodes)

1530 field_body = nodes.field_body('\n'.join(indented), *name_messages)

1531 field_node += field_body

1532 if indented:

1533 self.parse_field_body(indented, line_offset, field_body)

1534 return field_node, blank_finish

1535

1536 def parse_field_marker(self, match):

1537 """Extract & return field name from a field marker match."""

1538 field = match.group()[1:] # strip off leading ':'

1539 field = field[:field.rfind(':')] # strip off trailing ':' etc.

1540 return field

1541

1542 def parse_field_body(self, indented, offset, node) -> None:

1543 self.nested_parse(indented, input_offset=offset, node=node)

1544

1545 def option_marker(self, match, context, next_state):

1546 """Option list item."""

1547 optionlist = nodes.option_list()

1548 (optionlist.source, optionlist.line

1549 ) = self.state_machine.get_source_and_line()

1550 try:

1551 listitem, blank_finish = self.option_list_item(match)

1552 except MarkupError as error:

1553 # This shouldn't happen; pattern won't match.

1554 msg = self.reporter.error('Invalid option list marker: %s'

1555 % error)

1556 self.parent += msg

1557 (indented, indent, line_offset, blank_finish

1558 ) = self.state_machine.get_first_known_indented(match.end())

1559 elements = self.block_quote(indented, line_offset)

1560 self.parent += elements

1561 if not blank_finish:

1562 self.parent += self.unindent_warning('Option list')

1563 return [], next_state, []

1564 self.parent += optionlist

1565 optionlist += listitem

1566 offset = self.state_machine.line_offset + 1 # next line

1567 newline_offset, blank_finish = self.nested_list_parse(

1568 self.state_machine.input_lines[offset:],

1569 input_offset=self.state_machine.abs_line_offset() + 1,

1570 node=optionlist, initial_state='OptionList',

1571 blank_finish=blank_finish)

1572 self.goto_line(newline_offset)

1573 if not blank_finish:

1574 self.parent += self.unindent_warning('Option list')

1575 return [], next_state, []

1576

1577 def option_list_item(self, match):

1578 offset = self.state_machine.abs_line_offset()

1579 options = self.parse_option_marker(match)

1580 (indented, indent, line_offset, blank_finish

1581 ) = self.state_machine.get_first_known_indented(match.end())

1582 if not indented: # not an option list item

1583 self.goto_line(offset)

1584 raise statemachine.TransitionCorrection('text')

1585 option_group = nodes.option_group('', *options)

1586 description = nodes.description('\n'.join(indented))

1587 option_list_item = nodes.option_list_item('', option_group,

1588 description)

1589 if indented:

1590 self.nested_parse(indented, input_offset=line_offset,

1591 node=description)

1592 return option_list_item, blank_finish

1593

1594 def parse_option_marker(self, match):

1595 """

1596 Return a list of `node.option` and `node.option_argument` objects,

1597 parsed from an option marker match.

1598

1599 :Exception: `MarkupError` for invalid option markers.

1600 """

1601 optlist = []

1602 # split at ", ", except inside < > (complex arguments)

1603 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())

1604 for optionstring in optionstrings:

1605 tokens = optionstring.split()

1606 delimiter = ' '

1607 firstopt = tokens[0].split('=', 1)

1608 if len(firstopt) > 1:

1609 # "--opt=value" form

1610 tokens[:1] = firstopt

1611 delimiter = '='

1612 elif (len(tokens[0]) > 2

1613 and ((tokens[0].startswith('-')

1614 and not tokens[0].startswith('--'))

1615 or tokens[0].startswith('+'))):

1616 # "-ovalue" form

1617 tokens[:1] = [tokens[0][:2], tokens[0][2:]]

1618 delimiter = ''

1619 if len(tokens) > 1 and (tokens[1].startswith('<')

1620 and tokens[-1].endswith('>')):

1621 # "-o <value1 value2>" form; join all values into one token

1622 tokens[1:] = [' '.join(tokens[1:])]

1623 if 0 < len(tokens) <= 2:

1624 option = nodes.option(optionstring)

1625 option += nodes.option_string(tokens[0], tokens[0])

1626 if len(tokens) > 1:

1627 option += nodes.option_argument(tokens[1], tokens[1],

1628 delimiter=delimiter)

1629 optlist.append(option)

1630 else:

1631 raise MarkupError(

1632 'wrong number of option tokens (=%s), should be 1 or 2: '

1633 '"%s"' % (len(tokens), optionstring))

1634 return optlist

1635

1636 def doctest(self, match, context, next_state):

1637 line = self.document.current_line

1638 data = '\n'.join(self.state_machine.get_text_block())

1639 # TODO: Parse with `directives.body.CodeBlock` with

1640 # argument 'pycon' (Python Console) in Docutils 1.0.

1641 n = nodes.doctest_block(data, data)

1642 n.line = line

1643 self.parent += n

1644 return [], next_state, []

1645

1646 def line_block(self, match, context, next_state):

1647 """First line of a line block."""

1648 block = nodes.line_block()

1649 self.parent += block

1650 lineno = self.state_machine.abs_line_number()

1651 (block.source,

1652 block.line) = self.state_machine.get_source_and_line(lineno)

1653 line, messages, blank_finish = self.line_block_line(match, lineno)

1654 block += line

1655 self.parent += messages

1656 if not blank_finish:

1657 offset = self.state_machine.line_offset + 1 # next line

1658 new_line_offset, blank_finish = self.nested_list_parse(

1659 self.state_machine.input_lines[offset:],

1660 input_offset=self.state_machine.abs_line_offset() + 1,

1661 node=block, initial_state='LineBlock',

1662 blank_finish=False)

1663 self.goto_line(new_line_offset)

1664 if not blank_finish:

1665 self.parent += self.reporter.warning(

1666 'Line block ends without a blank line.',

1667 line=lineno+1)

1668 if len(block):

1669 if block[0].indent is None:

1670 block[0].indent = 0

1671 self.nest_line_block_lines(block)

1672 return [], next_state, []

1673

1674 def line_block_line(self, match, lineno):

1675 """Return one line element of a line_block."""

1676 (indented, indent, line_offset, blank_finish

1677 ) = self.state_machine.get_first_known_indented(match.end(),

1678 until_blank=True)

1679 text = '\n'.join(indented)

1680 text_nodes, messages = self.inline_text(text, lineno)

1681 line = nodes.line(text, '', *text_nodes)

1682 (line.source,

1683 line.line) = self.state_machine.get_source_and_line(lineno)

1684 if match.string.rstrip() != '|': # not empty

1685 line.indent = len(match.group(1)) - 1

1686 return line, messages, blank_finish

1687

1688 def nest_line_block_lines(self, block) -> None:

1689 for index in range(1, len(block)):

1690 if block[index].indent is None:

1691 block[index].indent = block[index - 1].indent

1692 self.nest_line_block_segment(block)

1693

1694 def nest_line_block_segment(self, block) -> None:

1695 indents = [item.indent for item in block]

1696 least = min(indents)

1697 new_items = []

1698 new_block = nodes.line_block()

1699 for item in block:

1700 if item.indent > least:

1701 new_block.append(item)

1702 else:

1703 if len(new_block):

1704 self.nest_line_block_segment(new_block)

1705 new_items.append(new_block)

1706 new_block = nodes.line_block()

1707 new_items.append(item)

1708 if len(new_block):

1709 self.nest_line_block_segment(new_block)

1710 new_items.append(new_block)

1711 block[:] = new_items

1712

1713 def grid_table_top(self, match, context, next_state):

1714 """Top border of a full table."""

1715 return self.table_top(match, context, next_state,

1716 self.isolate_grid_table,

1717 tableparser.GridTableParser)

1718

1719 def simple_table_top(self, match, context, next_state):

1720 """Top border of a simple table."""

1721 return self.table_top(match, context, next_state,

1722 self.isolate_simple_table,

1723 tableparser.SimpleTableParser)

1724

1725 def table_top(self, match, context, next_state,

1726 isolate_function, parser_class):

1727 """Top border of a generic table."""

1728 nodelist, blank_finish = self.table(isolate_function, parser_class)

1729 self.parent += nodelist

1730 if not blank_finish:

1731 msg = self.reporter.warning(

1732 'Blank line required after table.',

1733 line=self.state_machine.abs_line_number()+1)

1734 self.parent += msg

1735 return [], next_state, []

1736

1737 def table(self, isolate_function, parser_class):

1738 """Parse a table."""

1739 block, messages, blank_finish = isolate_function()

1740 if block:

1741 try:

1742 parser = parser_class()

1743 tabledata = parser.parse(block)

1744 tableline = (self.state_machine.abs_line_number() - len(block)

1745 + 1)

1746 table = self.build_table(tabledata, tableline)

1747 nodelist = [table] + messages

1748 except tableparser.TableMarkupError as err:

1749 nodelist = self.malformed_table(block, ' '.join(err.args),

1750 offset=err.offset) + messages

1751 else:

1752 nodelist = messages

1753 return nodelist, blank_finish

1754

1755 def isolate_grid_table(self):

1756 messages = []

1757 blank_finish = True

1758 try:

1759 block = self.state_machine.get_text_block(flush_left=True)

1760 except statemachine.UnexpectedIndentationError as err:

1761 block, src, srcline = err.args

1762 messages.append(self.reporter.error('Unexpected indentation.',

1763 source=src, line=srcline))

1764 blank_finish = False

1765 block.disconnect()

1766 # for East Asian chars:

1767 block.pad_double_width(self.double_width_pad_char)

1768 width = len(block[0].strip())

1769 for i in range(len(block)):

1770 block[i] = block[i].strip()

1771 if block[i][0] not in '+|': # check left edge

1772 blank_finish = False

1773 self.state_machine.previous_line(len(block) - i)

1774 del block[i:]

1775 break

1776 if not self.grid_table_top_pat.match(block[-1]): # find bottom

1777 # from second-last to third line of table:

1778 for i in range(len(block) - 2, 1, -1):

1779 if self.grid_table_top_pat.match(block[i]):

1780 self.state_machine.previous_line(len(block) - i + 1)

1781 del block[i+1:]

1782 blank_finish = False

1783 break

1784 else:

1785 detail = 'Bottom border missing or corrupt.'

1786 messages.extend(self.malformed_table(block, detail, i))

1787 return [], messages, blank_finish

1788 for i in range(len(block)): # check right edge

1789 if len(block[i]) != width or block[i][-1] not in '+|':

1790 detail = 'Right border not aligned or missing.'

1791 messages.extend(self.malformed_table(block, detail, i))

1792 return [], messages, blank_finish

1793 return block, messages, blank_finish

1794

1795 def isolate_simple_table(self):

1796 start = self.state_machine.line_offset

1797 lines = self.state_machine.input_lines

1798 limit = len(lines) - 1

1799 toplen = len(lines[start].strip())

1800 pattern_match = self.simple_table_border_pat.match

1801 found = 0

1802 found_at = None

1803 i = start + 1

1804 while i <= limit:

1805 line = lines[i]

1806 match = pattern_match(line)

1807 if match:

1808 if len(line.strip()) != toplen:

1809 self.state_machine.next_line(i - start)

1810 messages = self.malformed_table(

1811 lines[start:i+1], 'Bottom border or header rule does '

1812 'not match top border.', i-start)

1813 return [], messages, i == limit or not lines[i+1].strip()

1814 found += 1

1815 found_at = i

1816 if found == 2 or i == limit or not lines[i+1].strip():

1817 end = i

1818 break

1819 i += 1

1820 else: # reached end of input_lines

1821 details = 'No bottom table border found'

1822 if found:

1823 details += ' or no blank line after table bottom'

1824 self.state_machine.next_line(found_at - start)

1825 block = lines[start:found_at+1]

1826 else:

1827 self.state_machine.next_line(i - start - 1)

1828 block = lines[start:]

1829 messages = self.malformed_table(block, details + '.')

1830 return [], messages, not found

1831 self.state_machine.next_line(end - start)

1832 block = lines[start:end+1]

1833 # for East Asian chars:

1834 block.pad_double_width(self.double_width_pad_char)

1835 return block, [], end == limit or not lines[end+1].strip()

1836

1837 def malformed_table(self, block, detail='', offset=0):

1838 block.replace(self.double_width_pad_char, '')

1839 data = '\n'.join(block)

1840 message = 'Malformed table.'

1841 startline = self.state_machine.abs_line_number() - len(block) + 1

1842 if detail:

1843 message += '\n' + detail

1844 error = self.reporter.error(message, nodes.literal_block(data, data),

1845 line=startline+offset)

1846 return [error]

1847

1848 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):

1849 colwidths, headrows, bodyrows = tabledata

1850 table = nodes.table()

1851 if widths == 'auto':

1852 table['classes'] += ['colwidths-auto']

1853 elif widths: # "grid" or list of integers

1854 table['classes'] += ['colwidths-given']

1855 tgroup = nodes.tgroup(cols=len(colwidths))

1856 table += tgroup

1857 for colwidth in colwidths:

1858 colspec = nodes.colspec(colwidth=colwidth)

1859 if stub_columns:

1860 colspec.attributes['stub'] = True

1861 stub_columns -= 1

1862 tgroup += colspec

1863 if headrows:

1864 thead = nodes.thead()

1865 tgroup += thead

1866 for row in headrows:

1867 thead += self.build_table_row(row, tableline)

1868 tbody = nodes.tbody()

1869 tgroup += tbody

1870 for row in bodyrows:

1871 tbody += self.build_table_row(row, tableline)

1872 return table

1873

1874 def build_table_row(self, rowdata, tableline):

1875 row = nodes.row()

1876 for cell in rowdata:

1877 if cell is None:

1878 continue

1879 morerows, morecols, offset, cellblock = cell

1880 attributes = {}

1881 if morerows:

1882 attributes['morerows'] = morerows

1883 if morecols:

1884 attributes['morecols'] = morecols

1885 entry = nodes.entry(**attributes)

1886 row += entry

1887 if ''.join(cellblock):

1888 self.nested_parse(cellblock, input_offset=tableline+offset,

1889 node=entry)

1890 return row

1891

1892 explicit = Struct()

1893 """Patterns and constants used for explicit markup recognition."""

1894

1895 explicit.patterns = Struct(

1896 target=re.compile(r"""

1897 (

1898 _ # anonymous target

1899 | # *OR*

1900 (?!_) # no underscore at the beginning

1901 (?P<quote>`?) # optional open quote

1902 (?![ `]) # first char. not space or

1903 # backquote

1904 (?P<name> # reference name

1905 .+?

1906 )

1907 %(non_whitespace_escape_before)s

1908 (?P=quote) # close quote if open quote used

1909 )

1910 (?<!(?<!\x00):) # no unescaped colon at end

1911 %(non_whitespace_escape_before)s

1912 [ ]? # optional space

1913 : # end of reference name

1914 ([ ]+|$) # followed by whitespace

1915 """ % vars(Inliner), re.VERBOSE),

1916 reference=re.compile(r"""

1917 (

1918 (?P<simple>%(simplename)s)_

1919 | # *OR*

1920 ` # open backquote

1921 (?![ ]) # not space

1922 (?P<phrase>.+?) # hyperlink phrase

1923 %(non_whitespace_escape_before)s

1924 `_ # close backquote,

1925 # reference mark

1926 )

1927 $ # end of string

1928 """ % vars(Inliner), re.VERBOSE),

1929 substitution=re.compile(r"""

1930 (

1931 (?![ ]) # first char. not space

1932 (?P<name>.+?) # substitution text

1933 %(non_whitespace_escape_before)s

1934 \| # close delimiter

1935 )

1936 ([ ]+|$) # followed by whitespace

1937 """ % vars(Inliner),

1938 re.VERBOSE),)

1939

1940 def footnote(self, match):

1941 src, srcline = self.state_machine.get_source_and_line()

1942 (indented, indent, offset, blank_finish

1943 ) = self.state_machine.get_first_known_indented(match.end())

1944 label = match.group(1)

1945 name = normalize_name(label)

1946 footnote = nodes.footnote('\n'.join(indented))

1947 footnote.source = src

1948 footnote.line = srcline

1949 if name[0] == '#': # auto-numbered

1950 name = name[1:] # autonumber label

1951 footnote['auto'] = 1

1952 if name:

1953 footnote['names'].append(name)

1954 self.document.note_autofootnote(footnote)

1955 elif name == '*': # auto-symbol

1956 name = ''

1957 footnote['auto'] = '*'

1958 self.document.note_symbol_footnote(footnote)

1959 else: # manually numbered

1960 footnote += nodes.label('', label)

1961 footnote['names'].append(name)

1962 self.document.note_footnote(footnote)

1963 if name:

1964 self.document.note_explicit_target(footnote, footnote)

1965 else:

1966 self.document.set_id(footnote, footnote)

1967 if indented:

1968 self.nested_parse(indented, input_offset=offset, node=footnote)

1969 else:

1970 footnote += self.reporter.warning('Footnote content expected.')

1971 return [footnote], blank_finish

1972

1973 def citation(self, match):

1974 src, srcline = self.state_machine.get_source_and_line()

1975 (indented, indent, offset, blank_finish

1976 ) = self.state_machine.get_first_known_indented(match.end())

1977 label = match.group(1)

1978 name = normalize_name(label)

1979 citation = nodes.citation('\n'.join(indented))

1980 citation.source = src

1981 citation.line = srcline

1982 citation += nodes.label('', label)

1983 citation['names'].append(name)

1984 self.document.note_citation(citation)

1985 self.document.note_explicit_target(citation, citation)

1986 if indented:

1987 self.nested_parse(indented, input_offset=offset, node=citation)

1988 else:

1989 citation += self.reporter.warning('Citation content expected.')

1990 return [citation], blank_finish

1991

1992 def hyperlink_target(self, match):

1993 pattern = self.explicit.patterns.target

1994 lineno = self.state_machine.abs_line_number()

1995 (block, indent, offset, blank_finish

1996 ) = self.state_machine.get_first_known_indented(

1997 match.end(), until_blank=True, strip_indent=False)

1998 blocktext = match.string[:match.end()] + '\n'.join(block)

1999 block = [escape2null(line) for line in block]

2000 escaped = block[0]

2001 blockindex = 0

2002 while True:

2003 targetmatch = pattern.match(escaped)

2004 if targetmatch:

2005 break

2006 blockindex += 1

2007 try:

2008 escaped += block[blockindex]

2009 except IndexError:

2010 raise MarkupError('malformed hyperlink target.')

2011 del block[:blockindex]

2012 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()

2013 target = self.make_target(block, blocktext, lineno,

2014 targetmatch.group('name'))

2015 return [target], blank_finish

2016

2017 def make_target(self, block, block_text, lineno, target_name):

2018 target_type, data = self.parse_target(block, block_text, lineno)

2019 if target_type == 'refname':

2020 target = nodes.target(block_text, '', refname=normalize_name(data))

2021 target.indirect_reference_name = data

2022 self.add_target(target_name, '', target, lineno)

2023 self.document.note_indirect_target(target)

2024 return target

2025 elif target_type == 'refuri':

2026 target = nodes.target(block_text, '')

2027 self.add_target(target_name, data, target, lineno)

2028 return target

2029 else:

2030 return data

2031

2032 def parse_target(self, block, block_text, lineno):

2033 """

2034 Determine the type of reference of a target.

2035

2036 :Return: A 2-tuple, one of:

2037

2038 - 'refname' and the indirect reference name

2039 - 'refuri' and the URI

2040 - 'malformed' and a system_message node

2041 """

2042 if block and block[-1].strip()[-1:] == '_': # possible indirect target

2043 reference = ' '.join(line.strip() for line in block)

2044 refname = self.is_reference(reference)

2045 if refname:

2046 return 'refname', refname

2047 ref_parts = split_escaped_whitespace(' '.join(block))

2048 reference = ' '.join(''.join(unescape(part).split())

2049 for part in ref_parts)

2050 return 'refuri', reference

2051

2052 def is_reference(self, reference):

2053 match = self.explicit.patterns.reference.match(

2054 whitespace_normalize_name(reference))

2055 if not match:

2056 return None

2057 return unescape(match.group('simple') or match.group('phrase'))

2058

2059 def add_target(self, targetname, refuri, target, lineno):

2060 target.line = lineno

2061 if targetname:

2062 name = normalize_name(unescape(targetname))

2063 target['names'].append(name)

2064 if refuri:

2065 uri = self.inliner.adjust_uri(refuri)

2066 if uri:

2067 target['refuri'] = uri

2068 else:

2069 raise ApplicationError('problem with URI: %r' % refuri)

2070 self.document.note_explicit_target(target, self.parent)

2071 else: # anonymous target

2072 if refuri:

2073 target['refuri'] = refuri

2074 target['anonymous'] = True

2075 self.document.note_anonymous_target(target)

2076

2077 def substitution_def(self, match):

2078 pattern = self.explicit.patterns.substitution

2079 src, srcline = self.state_machine.get_source_and_line()

2080 (block, indent, offset, blank_finish

2081 ) = self.state_machine.get_first_known_indented(match.end(),

2082 strip_indent=False)

2083 blocktext = (match.string[:match.end()] + '\n'.join(block))

2084 block.disconnect()

2085 escaped = escape2null(block[0].rstrip())

2086 blockindex = 0

2087 while True:

2088 subdefmatch = pattern.match(escaped)

2089 if subdefmatch:

2090 break

2091 blockindex += 1

2092 try:

2093 escaped = escaped + ' ' + escape2null(

2094 block[blockindex].strip())

2095 except IndexError:

2096 raise MarkupError('malformed substitution definition.')

2097 del block[:blockindex] # strip out the substitution marker

2098 start = subdefmatch.end()-len(escaped)-1

2099 block[0] = (block[0].strip() + ' ')[start:-1]

2100 if not block[0]:

2101 del block[0]

2102 offset += 1

2103 while block and not block[-1].strip():

2104 block.pop()

2105 subname = subdefmatch.group('name')

2106 substitution_node = nodes.substitution_definition(blocktext)

2107 substitution_node.source = src

2108 substitution_node.line = srcline

2109 if not block:

2110 msg = self.reporter.warning(

2111 'Substitution definition "%s" missing contents.' % subname,

2112 nodes.literal_block(blocktext, blocktext),

2113 source=src, line=srcline)

2114 return [msg], blank_finish

2115 block[0] = block[0].strip()

2116 substitution_node['names'].append(

2117 nodes.whitespace_normalize_name(subname))

2118 new_abs_offset, blank_finish = self.nested_list_parse(

2119 block, input_offset=offset, node=substitution_node,

2120 initial_state='SubstitutionDef', blank_finish=blank_finish)

2121 i = 0

2122 for node in substitution_node[:]:

2123 if not (isinstance(node, nodes.Inline)

2124 or isinstance(node, nodes.Text)):

2125 self.parent += substitution_node[i]

2126 del substitution_node[i]

2127 else:

2128 i += 1

2129 for node in substitution_node.findall(nodes.Element):

2130 if self.disallowed_inside_substitution_definitions(node):

2131 pformat = nodes.literal_block('', node.pformat().rstrip())

2132 msg = self.reporter.error(

2133 'Substitution definition contains illegal element <%s>:'

2134 % node.tagname,

2135 pformat, nodes.literal_block(blocktext, blocktext),

2136 source=src, line=srcline)

2137 return [msg], blank_finish

2138 if len(substitution_node) == 0:

2139 msg = self.reporter.warning(

2140 'Substitution definition "%s" empty or invalid.' % subname,

2141 nodes.literal_block(blocktext, blocktext),

2142 source=src, line=srcline)

2143 return [msg], blank_finish

2144 self.document.note_substitution_def(

2145 substitution_node, subname, self.parent)

2146 return [substitution_node], blank_finish

2147

2148 def disallowed_inside_substitution_definitions(self, node) -> bool:

2149 if (node['ids']

2150 or isinstance(node, nodes.reference) and node.get('anonymous')

2151 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501

2152 return True

2153 else:

2154 return False

2155

2156 def directive(self, match, **option_presets):

2157 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""

2158 type_name = match.group(1)

2159 directive_class, messages = directives.directive(

2160 type_name, self.memo.language, self.document)

2161 self.parent += messages

2162 if directive_class:

2163 return self.run_directive(

2164 directive_class, match, type_name, option_presets)

2165 else:

2166 return self.unknown_directive(type_name)

2167

2168 def run_directive(self, directive, match, type_name, option_presets):

2169 """

2170 Parse a directive then run its directive function.

2171

2172 Parameters:

2173

2174 - `directive`: The class implementing the directive. Must be

2175 a subclass of `rst.Directive`.

2176

2177 - `match`: A regular expression match object which matched the first

2178 line of the directive.

2179

2180 - `type_name`: The directive name, as used in the source text.

2181

2182 - `option_presets`: A dictionary of preset options, defaults for the

2183 directive options. Currently, only an "alt" option is passed by

2184 substitution definitions (value: the substitution name), which may

2185 be used by an embedded image directive.

2186

2187 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.

2188 """

2189 if isinstance(directive, (FunctionType, MethodType)):

2190 from docutils.parsers.rst import convert_directive_function

2191 directive = convert_directive_function(directive)

2192 lineno = self.state_machine.abs_line_number()

2193 initial_line_offset = self.state_machine.line_offset

2194 (indented, indent, line_offset, blank_finish

2195 ) = self.state_machine.get_first_known_indented(match.end(),

2196 strip_top=0)

2197 block_text = '\n'.join(self.state_machine.input_lines[

2198 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501

2199 try:

2200 arguments, options, content, content_offset = (

2201 self.parse_directive_block(indented, line_offset,

2202 directive, option_presets))

2203 except MarkupError as detail:

2204 error = self.reporter.error(

2205 'Error in "%s" directive:\n%s.' % (type_name,

2206 ' '.join(detail.args)),

2207 nodes.literal_block(block_text, block_text), line=lineno)

2208 return [error], blank_finish

2209 directive_instance = directive(

2210 type_name, arguments, options, content, lineno,

2211 content_offset, block_text, self, self.state_machine)

2212 try:

2213 result = directive_instance.run()

2214 except docutils.parsers.rst.DirectiveError as error:

2215 msg_node = self.reporter.system_message(error.level, error.msg,

2216 line=lineno)

2217 msg_node += nodes.literal_block(block_text, block_text)

2218 result = [msg_node]

2219 assert isinstance(result, list), \

2220 'Directive "%s" must return a list of nodes.' % type_name

2221 for i in range(len(result)):

2222 assert isinstance(result[i], nodes.Node), \

2223 ('Directive "%s" returned non-Node object (index %s): %r'

2224 % (type_name, i, result[i]))

2225 return (result,

2226 blank_finish or self.state_machine.is_next_line_blank())

2227

2228 def parse_directive_block(self, indented, line_offset, directive,

2229 option_presets):

2230 option_spec = directive.option_spec

2231 has_content = directive.has_content

2232 if indented and not indented[0].strip():

2233 indented.trim_start()

2234 line_offset += 1

2235 while indented and not indented[-1].strip():

2236 indented.trim_end()

2237 if indented and (directive.required_arguments

2238 or directive.optional_arguments

2239 or option_spec):

2240 for i, line in enumerate(indented):

2241 if not line.strip():

2242 break

2243 else:

2244 i += 1

2245 arg_block = indented[:i]

2246 content = indented[i+1:]

2247 content_offset = line_offset + i + 1

2248 else:

2249 content = indented

2250 content_offset = line_offset

2251 arg_block = []

2252 if option_spec:

2253 options, arg_block = self.parse_directive_options(

2254 option_presets, option_spec, arg_block)

2255 else:

2256 options = {}

2257 if arg_block and not (directive.required_arguments

2258 or directive.optional_arguments):

2259 content = arg_block + indented[i:]

2260 content_offset = line_offset

2261 arg_block = []

2262 while content and not content[0].strip():

2263 content.trim_start()

2264 content_offset += 1

2265 if directive.required_arguments or directive.optional_arguments:

2266 arguments = self.parse_directive_arguments(

2267 directive, arg_block)

2268 else:

2269 arguments = []

2270 if content and not has_content:

2271 raise MarkupError('no content permitted')

2272 return arguments, options, content, content_offset

2273

2274 def parse_directive_options(self, option_presets, option_spec, arg_block):

2275 options = option_presets.copy()

2276 for i, line in enumerate(arg_block):

2277 if re.match(Body.patterns['field_marker'], line):

2278 opt_block = arg_block[i:]

2279 arg_block = arg_block[:i]

2280 break

2281 else:

2282 opt_block = []

2283 if opt_block:

2284 success, data = self.parse_extension_options(option_spec,

2285 opt_block)

2286 if success: # data is a dict of options

2287 options.update(data)

2288 else: # data is an error string

2289 raise MarkupError(data)

2290 return options, arg_block

2291

2292 def parse_directive_arguments(self, directive, arg_block):

2293 required = directive.required_arguments

2294 optional = directive.optional_arguments

2295 arg_text = '\n'.join(arg_block)

2296 arguments = arg_text.split()

2297 if len(arguments) < required:

2298 raise MarkupError('%s argument(s) required, %s supplied'

2299 % (required, len(arguments)))

2300 elif len(arguments) > required + optional:

2301 if directive.final_argument_whitespace:

2302 arguments = arg_text.split(None, required + optional - 1)

2303 else:

2304 raise MarkupError(

2305 'maximum %s argument(s) allowed, %s supplied'

2306 % (required + optional, len(arguments)))

2307 return arguments

2308

2309 def parse_extension_options(self, option_spec, datalines):

2310 """

2311 Parse `datalines` for a field list containing extension options

2312 matching `option_spec`.

2313

2314 :Parameters:

2315 - `option_spec`: a mapping of option name to conversion

2316 function, which should raise an exception on bad input.

2317 - `datalines`: a list of input strings.

2318

2319 :Return:

2320 - Success value, 1 or 0.

2321 - An option dictionary on success, an error string on failure.

2322 """

2323 node = nodes.field_list()

2324 newline_offset, blank_finish = self.nested_list_parse(

2325 datalines, 0, node, initial_state='ExtensionOptions',

2326 blank_finish=True)

2327 if newline_offset != len(datalines): # incomplete parse of block

2328 return 0, 'invalid option block'

2329 try:

2330 options = utils.extract_extension_options(node, option_spec)

2331 except KeyError as detail:

2332 return 0, 'unknown option: "%s"' % detail.args[0]

2333 except (ValueError, TypeError) as detail:

2334 return 0, 'invalid option value: %s' % ' '.join(detail.args)

2335 except utils.ExtensionOptionError as detail:

2336 return 0, 'invalid option data: %s' % ' '.join(detail.args)

2337 if blank_finish:

2338 return 1, options

2339 else:

2340 return 0, 'option data incompletely parsed'

2341

2342 def unknown_directive(self, type_name):

2343 lineno = self.state_machine.abs_line_number()

2344 (indented, indent, offset, blank_finish

2345 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)

2346 text = '\n'.join(indented)

2347 error = self.reporter.error('Unknown directive type "%s".' % type_name,

2348 nodes.literal_block(text, text),

2349 line=lineno)

2350 return [error], blank_finish

2351

2352 def comment(self, match):

2353 if self.state_machine.is_next_line_blank():

2354 first_comment_line = match.string[match.end():]

2355 if not first_comment_line.strip(): # empty comment

2356 return [nodes.comment()], True # "A tiny but practical wart."

2357 if first_comment_line.startswith('end of inclusion from "'):

2358 # cf. parsers.rst.directives.misc.Include

2359 self.document.include_log.pop()

2360 return [], True

2361 (indented, indent, offset, blank_finish

2362 ) = self.state_machine.get_first_known_indented(match.end())

2363 while indented and not indented[-1].strip():

2364 indented.trim_end()

2365 text = '\n'.join(indented)

2366 return [nodes.comment(text, text)], blank_finish

2367

2368 explicit.constructs = [

2369 (footnote,

2370 re.compile(r"""

2371 \.\.[ ]+ # explicit markup start

2372 \[

2373 ( # footnote label:

2374 [0-9]+ # manually numbered footnote

2375 | # *OR*

2376 \# # anonymous auto-numbered footnote

2377 | # *OR*

2378 \#%s # auto-number ed?) footnote label

2379 | # *OR*

2380 \* # auto-symbol footnote

2381 )

2382 \]

2383 ([ ]+|$) # whitespace or end of line

2384 """ % Inliner.simplename, re.VERBOSE)),

2385 (citation,

2386 re.compile(r"""

2387 \.\.[ ]+ # explicit markup start

2388 \[(%s)\] # citation label

2389 ([ ]+|$) # whitespace or end of line

2390 """ % Inliner.simplename, re.VERBOSE)),

2391 (hyperlink_target,

2392 re.compile(r"""

2393 \.\.[ ]+ # explicit markup start

2394 _ # target indicator

2395 (?![ ]|$) # first char. not space or EOL

2396 """, re.VERBOSE)),

2397 (substitution_def,

2398 re.compile(r"""

2399 \.\.[ ]+ # explicit markup start

2400 \| # substitution indicator

2401 (?![ ]|$) # first char. not space or EOL

2402 """, re.VERBOSE)),

2403 (directive,

2404 re.compile(r"""

2405 \.\.[ ]+ # explicit markup start

2406 (%s) # directive name

2407 [ ]? # optional space

2408 :: # directive delimiter

2409 ([ ]+|$) # whitespace or end of line

2410 """ % Inliner.simplename, re.VERBOSE))]

2411

2412 def explicit_markup(self, match, context, next_state):

2413 """Footnotes, hyperlink targets, directives, comments."""

2414 nodelist, blank_finish = self.explicit_construct(match)

2415 self.parent += nodelist

2416 self.explicit_list(blank_finish)

2417 return [], next_state, []

2418

2419 def explicit_construct(self, match):

2420 """Determine which explicit construct this is, parse & return it."""

2421 errors = []

2422 for method, pattern in self.explicit.constructs:

2423 expmatch = pattern.match(match.string)

2424 if expmatch:

2425 try:

2426 return method(self, expmatch)

2427 except MarkupError as error:

2428 lineno = self.state_machine.abs_line_number()

2429 message = ' '.join(error.args)

2430 errors.append(self.reporter.warning(message, line=lineno))

2431 break

2432 nodelist, blank_finish = self.comment(match)

2433 return nodelist + errors, blank_finish

2434

2435 def explicit_list(self, blank_finish) -> None:

2436 """

2437 Create a nested state machine for a series of explicit markup

2438 constructs (including anonymous hyperlink targets).

2439 """

2440 offset = self.state_machine.line_offset + 1 # next line

2441 newline_offset, blank_finish = self.nested_list_parse(

2442 self.state_machine.input_lines[offset:],

2443 input_offset=self.state_machine.abs_line_offset() + 1,

2444 node=self.parent, initial_state='Explicit',

2445 blank_finish=blank_finish)

2446 self.goto_line(newline_offset)

2447 if not blank_finish:

2448 self.parent += self.unindent_warning('Explicit markup')

2449

2450 def anonymous(self, match, context, next_state):

2451 """Anonymous hyperlink targets."""

2452 nodelist, blank_finish = self.anonymous_target(match)

2453 self.parent += nodelist

2454 self.explicit_list(blank_finish)

2455 return [], next_state, []

2456

2457 def anonymous_target(self, match):

2458 lineno = self.state_machine.abs_line_number()

2459 (block, indent, offset, blank_finish

2460 ) = self.state_machine.get_first_known_indented(match.end(),

2461 until_blank=True)

2462 blocktext = match.string[:match.end()] + '\n'.join(block)

2463 block = [escape2null(line) for line in block]

2464 target = self.make_target(block, blocktext, lineno, '')

2465 return [target], blank_finish

2466

2467 def line(self, match, context, next_state):

2468 """Section title overline or transition marker."""

2469 if self.state_machine.match_titles:

2470 return [match.string], 'Line', []

2471 elif match.string.strip() == '::':

2472 raise statemachine.TransitionCorrection('text')

2473 elif len(match.string.strip()) < 4:

2474 msg = self.reporter.info(

2475 'Unexpected possible title overline or transition.\n'

2476 "Treating it as ordinary text because it's so short.",

2477 line=self.state_machine.abs_line_number())

2478 self.parent += msg

2479 raise statemachine.TransitionCorrection('text')

2480 else:

2481 blocktext = self.state_machine.line

2482 msg = self.reporter.error(

2483 'Unexpected section title or transition.',

2484 nodes.literal_block(blocktext, blocktext),

2485 line=self.state_machine.abs_line_number())

2486 self.parent += msg

2487 return [], next_state, []

2488

2489 def text(self, match, context, next_state):

2490 """Titles, definition lists, paragraphs."""

2491 return [match.string], 'Text', []

2492

2493

2494class RFC2822Body(Body):

2495

2496 """

2497 RFC2822 headers are only valid as the first constructs in documents. As

2498 soon as anything else appears, the `Body` state should take over.

2499 """

2500

2501 patterns = Body.patterns.copy() # can't modify the original

2502 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'

2503 initial_transitions = [(name, 'Body')

2504 for name in Body.initial_transitions]

2505 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'

2506

2507 def rfc2822(self, match, context, next_state):

2508 """RFC2822-style field list item."""

2509 fieldlist = nodes.field_list(classes=['rfc2822'])

2510 self.parent += fieldlist

2511 field, blank_finish = self.rfc2822_field(match)

2512 fieldlist += field

2513 offset = self.state_machine.line_offset + 1 # next line

2514 newline_offset, blank_finish = self.nested_list_parse(

2515 self.state_machine.input_lines[offset:],

2516 input_offset=self.state_machine.abs_line_offset() + 1,

2517 node=fieldlist, initial_state='RFC2822List',

2518 blank_finish=blank_finish)

2519 self.goto_line(newline_offset)

2520 if not blank_finish:

2521 self.parent += self.unindent_warning(

2522 'RFC2822-style field list')

2523 return [], next_state, []

2524

2525 def rfc2822_field(self, match):

2526 name = match.string[:match.string.find(':')]

2527 (indented, indent, line_offset, blank_finish

2528 ) = self.state_machine.get_first_known_indented(match.end(),

2529 until_blank=True)

2530 fieldnode = nodes.field()

2531 fieldnode += nodes.field_name(name, name)

2532 fieldbody = nodes.field_body('\n'.join(indented))

2533 fieldnode += fieldbody

2534 if indented:

2535 self.nested_parse(indented, input_offset=line_offset,

2536 node=fieldbody)

2537 return fieldnode, blank_finish

2538

2539

2540class SpecializedBody(Body):

2541

2542 """

2543 Superclass for second and subsequent compound element members. Compound

2544 elements are lists and list-like constructs.

2545

2546 All transition methods are disabled (redefined as `invalid_input`).

2547 Override individual methods in subclasses to re-enable.

2548

2549 For example, once an initial bullet list item, say, is recognized, the

2550 `BulletList` subclass takes over, with a "bullet_list" node as its

2551 container. Upon encountering the initial bullet list item, `Body.bullet`

2552 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which

2553 starts up a nested parsing session with `BulletList` as the initial state.

2554 Only the ``bullet`` transition method is enabled in `BulletList`; as long

2555 as only bullet list items are encountered, they are parsed and inserted

2556 into the container. The first construct which is *not* a bullet list item

2557 triggers the `invalid_input` method, which ends the nested parse and

2558 closes the container. `BulletList` needs to recognize input that is

2559 invalid in the context of a bullet list, which means everything *other

2560 than* bullet list items, so it inherits the transition list created in

2561 `Body`.

2562 """

2563

2564 def invalid_input(self, match=None, context=None, next_state=None):

2565 """Not a compound element member. Abort this state machine."""

2566 self.state_machine.previous_line() # back up so parent SM can reassess

2567 raise EOFError

2568

2569 indent = invalid_input

2570 bullet = invalid_input

2571 enumerator = invalid_input

2572 field_marker = invalid_input

2573 option_marker = invalid_input

2574 doctest = invalid_input

2575 line_block = invalid_input

2576 grid_table_top = invalid_input

2577 simple_table_top = invalid_input

2578 explicit_markup = invalid_input

2579 anonymous = invalid_input

2580 line = invalid_input

2581 text = invalid_input

2582

2583

2584class BulletList(SpecializedBody):

2585

2586 """Second and subsequent bullet_list list_items."""

2587

2588 def bullet(self, match, context, next_state):

2589 """Bullet list item."""

2590 if match.string[0] != self.parent['bullet']:

2591 # different bullet: new list

2592 self.invalid_input()

2593 listitem, blank_finish = self.list_item(match.end())

2594 self.parent += listitem

2595 self.blank_finish = blank_finish

2596 return [], next_state, []

2597

2598

2599class DefinitionList(SpecializedBody):

2600

2601 """Second and subsequent definition_list_items."""

2602

2603 def text(self, match, context, next_state):

2604 """Definition lists."""

2605 return [match.string], 'Definition', []

2606

2607

2608class EnumeratedList(SpecializedBody):

2609

2610 """Second and subsequent enumerated_list list_items."""

2611

2612 def enumerator(self, match, context, next_state):

2613 """Enumerated list item."""

2614 format, sequence, text, ordinal = self.parse_enumerator(

2615 match, self.parent['enumtype'])

2616 if (format != self.format

2617 or (sequence != '#' and (sequence != self.parent['enumtype']

2618 or self.auto

2619 or ordinal != (self.lastordinal + 1)))

2620 or not self.is_enumerated_list_item(ordinal, sequence, format)):

2621 # different enumeration: new list

2622 self.invalid_input()

2623 if sequence == '#':

2624 self.auto = 1

2625 listitem, blank_finish = self.list_item(match.end())

2626 self.parent += listitem

2627 self.blank_finish = blank_finish

2628 self.lastordinal = ordinal

2629 return [], next_state, []

2630

2631

2632class FieldList(SpecializedBody):

2633

2634 """Second and subsequent field_list fields."""

2635

2636 def field_marker(self, match, context, next_state):

2637 """Field list field."""

2638 field, blank_finish = self.field(match)

2639 self.parent += field

2640 self.blank_finish = blank_finish

2641 return [], next_state, []

2642

2643

2644class OptionList(SpecializedBody):

2645

2646 """Second and subsequent option_list option_list_items."""

2647

2648 def option_marker(self, match, context, next_state):

2649 """Option list item."""

2650 try:

2651 option_list_item, blank_finish = self.option_list_item(match)

2652 except MarkupError:

2653 self.invalid_input()

2654 self.parent += option_list_item

2655 self.blank_finish = blank_finish

2656 return [], next_state, []

2657

2658

2659class RFC2822List(SpecializedBody, RFC2822Body):

2660

2661 """Second and subsequent RFC2822-style field_list fields."""

2662

2663 patterns = RFC2822Body.patterns

2664 initial_transitions = RFC2822Body.initial_transitions

2665

2666 def rfc2822(self, match, context, next_state):

2667 """RFC2822-style field list item."""

2668 field, blank_finish = self.rfc2822_field(match)

2669 self.parent += field

2670 self.blank_finish = blank_finish

2671 return [], 'RFC2822List', []

2672

2673 blank = SpecializedBody.invalid_input

2674

2675

2676class ExtensionOptions(FieldList):

2677

2678 """

2679 Parse field_list fields for extension options.

2680

2681 No nested parsing is done (including inline markup parsing).

2682 """

2683

2684 def parse_field_body(self, indented, offset, node) -> None:

2685 """Override `Body.parse_field_body` for simpler parsing."""

2686 lines = []

2687 for line in list(indented) + ['']:

2688 if line.strip():

2689 lines.append(line)

2690 elif lines:

2691 text = '\n'.join(lines)

2692 node += nodes.paragraph(text, text)

2693 lines = []

2694

2695

2696class LineBlock(SpecializedBody):

2697

2698 """Second and subsequent lines of a line_block."""

2699

2700 blank = SpecializedBody.invalid_input

2701

2702 def line_block(self, match, context, next_state):

2703 """New line of line block."""

2704 lineno = self.state_machine.abs_line_number()

2705 line, messages, blank_finish = self.line_block_line(match, lineno)

2706 self.parent += line

2707 self.parent.parent += messages

2708 self.blank_finish = blank_finish

2709 return [], next_state, []

2710

2711

2712class Explicit(SpecializedBody):

2713

2714 """Second and subsequent explicit markup construct."""

2715

2716 def explicit_markup(self, match, context, next_state):

2717 """Footnotes, hyperlink targets, directives, comments."""

2718 nodelist, blank_finish = self.explicit_construct(match)

2719 self.parent += nodelist

2720 self.blank_finish = blank_finish

2721 return [], next_state, []

2722

2723 def anonymous(self, match, context, next_state):

2724 """Anonymous hyperlink targets."""

2725 nodelist, blank_finish = self.anonymous_target(match)

2726 self.parent += nodelist

2727 self.blank_finish = blank_finish

2728 return [], next_state, []

2729

2730 blank = SpecializedBody.invalid_input

2731

2732

2733class SubstitutionDef(Body):

2734

2735 """

2736 Parser for the contents of a substitution_definition element.

2737 """

2738

2739 patterns = {

2740 'embedded_directive': re.compile(r'(%s)::( +|$)'

2741 % Inliner.simplename),

2742 'text': r''}

2743 initial_transitions = ['embedded_directive', 'text']

2744

2745 def embedded_directive(self, match, context, next_state):

2746 nodelist, blank_finish = self.directive(match,

2747 alt=self.parent['names'][0])

2748 self.parent += nodelist

2749 if not self.state_machine.at_eof():

2750 self.blank_finish = blank_finish

2751 raise EOFError

2752

2753 def text(self, match, context, next_state):

2754 if not self.state_machine.at_eof():

2755 self.blank_finish = self.state_machine.is_next_line_blank()

2756 raise EOFError

2757

2758

2759class Text(RSTState):

2760

2761 """

2762 Classifier of second line of a text block.

2763

2764 Could be a paragraph, a definition list item, or a title.

2765 """

2766

2767 patterns = {'underline': Body.patterns['line'],

2768 'text': r''}

2769 initial_transitions = [('underline', 'Body'), ('text', 'Body')]

2770

2771 def blank(self, match, context, next_state):

2772 """End of paragraph."""

2773 # NOTE: self.paragraph returns [node, system_message(s)], literalnext

2774 paragraph, literalnext = self.paragraph(

2775 context, self.state_machine.abs_line_number() - 1)

2776 self.parent += paragraph

2777 if literalnext:

2778 self.parent += self.literal_block()

2779 return [], 'Body', []

2780

2781 def eof(self, context):

2782 if context:

2783 self.blank(None, context, None)

2784 return []

2785

2786 def indent(self, match, context, next_state):

2787 """Definition list item."""

2788 dl = nodes.definition_list()

2789 # the definition list starts on the line before the indent:

2790 lineno = self.state_machine.abs_line_number() - 1

2791 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)

2792 dl_item, blank_finish = self.definition_list_item(context)

2793 dl += dl_item

2794 self.parent += dl

2795 offset = self.state_machine.line_offset + 1 # next line

2796 newline_offset, blank_finish = self.nested_list_parse(

2797 self.state_machine.input_lines[offset:],

2798 input_offset=self.state_machine.abs_line_offset() + 1,

2799 node=dl, initial_state='DefinitionList',

2800 blank_finish=blank_finish, blank_finish_state='Definition')

2801 self.goto_line(newline_offset)

2802 if not blank_finish:

2803 self.parent += self.unindent_warning('Definition list')

2804 return [], 'Body', []

2805

2806 def underline(self, match, context, next_state):

2807 """Section title."""

2808 lineno = self.state_machine.abs_line_number()

2809 title = context[0].rstrip()

2810 underline = match.string.rstrip()

2811 source = title + '\n' + underline

2812 messages = []

2813 if column_width(title) > len(underline):

2814 if len(underline) < 4:

2815 if self.state_machine.match_titles:

2816 msg = self.reporter.info(

2817 'Possible title underline, too short for the title.\n'

2818 "Treating it as ordinary text because it's so short.",

2819 line=lineno)

2820 self.parent += msg

2821 raise statemachine.TransitionCorrection('text')

2822 else:

2823 blocktext = context[0] + '\n' + self.state_machine.line

2824 msg = self.reporter.warning(

2825 'Title underline too short.',

2826 nodes.literal_block(blocktext, blocktext),

2827 line=lineno)

2828 messages.append(msg)

2829 if not self.state_machine.match_titles:

2830 blocktext = context[0] + '\n' + self.state_machine.line

2831 # We need get_source_and_line() here to report correctly

2832 src, srcline = self.state_machine.get_source_and_line()

2833 # TODO: why is abs_line_number() == srcline+1

2834 # if the error is in a table (try with test_tables.py)?

2835 # print("get_source_and_line", srcline)

2836 # print("abs_line_number", self.state_machine.abs_line_number())

2837 msg = self.reporter.error(

2838 'Unexpected section title.',

2839 nodes.literal_block(blocktext, blocktext),

2840 source=src, line=srcline)

2841 self.parent += messages

2842 self.parent += msg

2843 return [], next_state, []

2844 style = underline[0]

2845 context[:] = []

2846 self.section(title, source, style, lineno - 1, messages)

2847 return [], next_state, []

2848

2849 def text(self, match, context, next_state):

2850 """Paragraph."""

2851 startline = self.state_machine.abs_line_number() - 1

2852 msg = None

2853 try:

2854 block = self.state_machine.get_text_block(flush_left=True)

2855 except statemachine.UnexpectedIndentationError as err:

2856 block, src, srcline = err.args

2857 msg = self.reporter.error('Unexpected indentation.',

2858 source=src, line=srcline)

2859 lines = context + list(block)

2860 paragraph, literalnext = self.paragraph(lines, startline)

2861 self.parent += paragraph

2862 self.parent += msg

2863 if literalnext:

2864 try:

2865 self.state_machine.next_line()

2866 except EOFError:

2867 pass

2868 self.parent += self.literal_block()

2869 return [], next_state, []

2870

2871 def literal_block(self):

2872 """Return a list of nodes."""

2873 (indented, indent, offset, blank_finish

2874 ) = self.state_machine.get_indented()

2875 while indented and not indented[-1].strip():

2876 indented.trim_end()

2877 if not indented:

2878 return self.quoted_literal_block()

2879 data = '\n'.join(indented)

2880 literal_block = nodes.literal_block(data, data)

2881 (literal_block.source,

2882 literal_block.line) = self.state_machine.get_source_and_line(offset+1)

2883 nodelist = [literal_block]

2884 if not blank_finish:

2885 nodelist.append(self.unindent_warning('Literal block'))

2886 return nodelist

2887

2888 def quoted_literal_block(self):

2889 abs_line_offset = self.state_machine.abs_line_offset()

2890 offset = self.state_machine.line_offset

2891 parent_node = nodes.Element()

2892 new_abs_offset = self.nested_parse(

2893 self.state_machine.input_lines[offset:],

2894 input_offset=abs_line_offset, node=parent_node, match_titles=False,

2895 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),

2896 'initial_state': 'QuotedLiteralBlock'})

2897 self.goto_line(new_abs_offset)

2898 return parent_node.children

2899

2900 def definition_list_item(self, termline):

2901 # the parser is already on the second (indented) line:

2902 dd_lineno = self.state_machine.abs_line_number()

2903 dt_lineno = dd_lineno - 1

2904 (indented, indent, line_offset, blank_finish

2905 ) = self.state_machine.get_indented()

2906 dl_item = nodes.definition_list_item(

2907 '\n'.join(termline + list(indented)))

2908 (dl_item.source,

2909 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)

2910 dt_nodes, messages = self.term(termline, dt_lineno)

2911 dl_item += dt_nodes

2912 dd = nodes.definition('', *messages)

2913 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)

2914 dl_item += dd

2915 if termline[0][-2:] == '::':

2916 dd += self.reporter.info(

2917 'Blank line missing before literal block (after the "::")? '

2918 'Interpreted as a definition list item.',

2919 line=dd_lineno)

2920 # TODO: drop a definition if it is an empty comment to allow

2921 # definition list items with several terms?

2922 # https://sourceforge.net/p/docutils/feature-requests/60/

2923 self.nested_parse(indented, input_offset=line_offset, node=dd)

2924 return dl_item, blank_finish

2925

2926 classifier_delimiter = re.compile(' +: +')

2927

2928 def term(self, lines, lineno):

2929 """Return a definition_list's term and optional classifiers."""

2930 assert len(lines) == 1

2931 text_nodes, messages = self.inline_text(lines[0], lineno)

2932 dt = nodes.term(lines[0])

2933 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)

2934 node_list = [dt]

2935 for i in range(len(text_nodes)):

2936 node = text_nodes[i]

2937 if isinstance(node, nodes.Text):

2938 parts = self.classifier_delimiter.split(node)

2939 if len(parts) == 1:

2940 node_list[-1] += node

2941 else:

2942 text = parts[0].rstrip()

2943 textnode = nodes.Text(text)

2944 node_list[-1] += textnode

2945 node_list += [nodes.classifier(unescape(part, True), part)

2946 for part in parts[1:]]

2947 else:

2948 node_list[-1] += node

2949 return node_list, messages

2950

2951

2952class SpecializedText(Text):

2953

2954 """

2955 Superclass for second and subsequent lines of Text-variants.

2956

2957 All transition methods are disabled. Override individual methods in

2958 subclasses to re-enable.

2959 """

2960

2961 def eof(self, context):

2962 """Incomplete construct."""

2963 return []

2964

2965 def invalid_input(self, match=None, context=None, next_state=None):

2966 """Not a compound element member. Abort this state machine."""

2967 raise EOFError

2968

2969 blank = invalid_input

2970 indent = invalid_input

2971 underline = invalid_input

2972 text = invalid_input

2973

2974

2975class Definition(SpecializedText):

2976

2977 """Second line of potential definition_list_item."""

2978

2979 def eof(self, context):

2980 """Not a definition."""

2981 self.state_machine.previous_line(2) # so parent SM can reassess

2982 return []

2983

2984 def indent(self, match, context, next_state):

2985 """Definition list item."""

2986 dl_item, blank_finish = self.definition_list_item(context)

2987 self.parent += dl_item

2988 self.blank_finish = blank_finish

2989 return [], 'DefinitionList', []

2990

2991

2992class Line(SpecializedText):

2993

2994 """

2995 Second line of over- & underlined section title or transition marker.

2996 """

2997

2998 eofcheck = 1 # ignored, will be removed in Docutils 2.0.

2999

3000 def eof(self, context):

3001 """Transition marker at end of section or document."""

3002 marker = context[0].strip()

3003 if len(marker) < 4:

3004 self.state_correction(context)

3005 src, srcline = self.state_machine.get_source_and_line()

3006 # lineno = self.state_machine.abs_line_number() - 1

3007 transition = nodes.transition(rawsource=context[0])

3008 transition.source = src

3009 transition.line = srcline - 1

3010 # transition.line = lineno

3011 self.parent += transition

3012 return []

3013

3014 def blank(self, match, context, next_state):

3015 """Transition marker."""

3016 src, srcline = self.state_machine.get_source_and_line()

3017 marker = context[0].strip()

3018 if len(marker) < 4:

3019 self.state_correction(context)

3020 transition = nodes.transition(rawsource=marker)

3021 transition.source = src

3022 transition.line = srcline - 1

3023 self.parent += transition

3024 return [], 'Body', []

3025

3026 def text(self, match, context, next_state):

3027 """Potential over- & underlined title."""

3028 lineno = self.state_machine.abs_line_number() - 1

3029 overline = context[0]

3030 title = match.string

3031 underline = ''

3032 try:

3033 underline = self.state_machine.next_line()

3034 except EOFError:

3035 blocktext = overline + '\n' + title

3036 if len(overline.rstrip()) < 4:

3037 self.short_overline(context, blocktext, lineno, 2)

3038 else:

3039 msg = self.reporter.error(

3040 'Incomplete section title.',

3041 nodes.literal_block(blocktext, blocktext),

3042 line=lineno)

3043 self.parent += msg

3044 return [], 'Body', []

3045 source = '%s\n%s\n%s' % (overline, title, underline)

3046 overline = overline.rstrip()

3047 underline = underline.rstrip()

3048 if not self.transitions['underline'][0].match(underline):

3049 blocktext = overline + '\n' + title + '\n' + underline

3050 if len(overline.rstrip()) < 4:

3051 self.short_overline(context, blocktext, lineno, 2)

3052 else:

3053 msg = self.reporter.error(

3054 'Missing matching underline for section title overline.',

3055 nodes.literal_block(source, source),

3056 line=lineno)

3057 self.parent += msg

3058 return [], 'Body', []

3059 elif overline != underline:

3060 blocktext = overline + '\n' + title + '\n' + underline

3061 if len(overline.rstrip()) < 4:

3062 self.short_overline(context, blocktext, lineno, 2)

3063 else:

3064 msg = self.reporter.error(

3065 'Title overline & underline mismatch.',

3066 nodes.literal_block(source, source),

3067 line=lineno)

3068 self.parent += msg

3069 return [], 'Body', []

3070 title = title.rstrip()

3071 messages = []

3072 if column_width(title) > len(overline):

3073 blocktext = overline + '\n' + title + '\n' + underline

3074 if len(overline.rstrip()) < 4:

3075 self.short_overline(context, blocktext, lineno, 2)

3076 else:

3077 msg = self.reporter.warning(

3078 'Title overline too short.',

3079 nodes.literal_block(source, source),

3080 line=lineno)

3081 messages.append(msg)

3082 style = (overline[0], underline[0])

3083 self.section(title.lstrip(), source, style, lineno + 1, messages)

3084 return [], 'Body', []

3085

3086 indent = text # indented title

3087

3088 def underline(self, match, context, next_state):

3089 overline = context[0]

3090 blocktext = overline + '\n' + self.state_machine.line

3091 lineno = self.state_machine.abs_line_number() - 1

3092 if len(overline.rstrip()) < 4:

3093 self.short_overline(context, blocktext, lineno, 1)

3094 msg = self.reporter.error(

3095 'Invalid section title or transition marker.',

3096 nodes.literal_block(blocktext, blocktext),

3097 line=lineno)

3098 self.parent += msg

3099 return [], 'Body', []

3100

3101 def short_overline(self, context, blocktext, lineno, lines=1) -> None:

3102 msg = self.reporter.info(

3103 'Possible incomplete section title.\nTreating the overline as '

3104 "ordinary text because it's so short.",

3105 line=lineno)

3106 self.parent += msg

3107 self.state_correction(context, lines)

3108

3109 def state_correction(self, context, lines=1):

3110 self.state_machine.previous_line(lines)

3111 context[:] = []

3112 raise statemachine.StateCorrection('Body', 'text')

3113

3114

3115class QuotedLiteralBlock(RSTState):

3116

3117 """

3118 Nested parse handler for quoted (unindented) literal blocks.

3119

3120 Special-purpose. Not for inclusion in `state_classes`.

3121 """

3122

3123 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,

3124 'text': r''}

3125 initial_transitions = ('initial_quoted', 'text')

3126

3127 def __init__(self, state_machine, debug=False) -> None:

3128 RSTState.__init__(self, state_machine, debug)

3129 self.messages = []

3130 self.initial_lineno = None

3131

3132 def blank(self, match, context, next_state):

3133 if context:

3134 raise EOFError

3135 else:

3136 return context, next_state, []

3137

3138 def eof(self, context):

3139 if context:

3140 src, srcline = self.state_machine.get_source_and_line(

3141 self.initial_lineno)

3142 text = '\n'.join(context)

3143 literal_block = nodes.literal_block(text, text)

3144 literal_block.source = src

3145 literal_block.line = srcline

3146 self.parent += literal_block

3147 else:

3148 self.parent += self.reporter.warning(

3149 'Literal block expected; none found.',

3150 line=self.state_machine.abs_line_number()

3151 ) # src not available, statemachine.input_lines is empty

3152 self.state_machine.previous_line()

3153 self.parent += self.messages

3154 return []

3155

3156 def indent(self, match, context, next_state):

3157 assert context, ('QuotedLiteralBlock.indent: context should not '

3158 'be empty!')

3159 self.messages.append(

3160 self.reporter.error('Unexpected indentation.',

3161 line=self.state_machine.abs_line_number()))

3162 self.state_machine.previous_line()

3163 raise EOFError

3164

3165 def initial_quoted(self, match, context, next_state):

3166 """Match arbitrary quote character on the first line only."""

3167 self.remove_transition('initial_quoted')

3168 quote = match.string[0]

3169 pattern = re.compile(re.escape(quote))

3170 # New transition matches consistent quotes only:

3171 self.add_transition('quoted',

3172 (pattern, self.quoted, self.__class__.__name__))

3173 self.initial_lineno = self.state_machine.abs_line_number()

3174 return [match.string], next_state, []

3175

3176 def quoted(self, match, context, next_state):

3177 """Match consistent quotes on subsequent lines."""

3178 context.append(match.string)

3179 return context, next_state, []

3180

3181 def text(self, match, context, next_state):

3182 if context:

3183 self.messages.append(

3184 self.reporter.error('Inconsistent literal block quoting.',

3185 line=self.state_machine.abs_line_number()))

3186 self.state_machine.previous_line()

3187 raise EOFError

3188

3189

3190state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,

3191 OptionList, LineBlock, ExtensionOptions, Explicit, Text,

3192 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)

3193"""Standard set of State classes used to start `RSTStateMachine`."""