Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/parsers/rst/states.py: 67%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Copyright: This module has been placed in the public domain.

5"""

6This is the ``docutils.parsers.rst.states`` module, the core of

7the reStructuredText parser. It defines the following:

9:Classes:

10 - `RSTStateMachine`: reStructuredText parser's entry point.

11 - `NestedStateMachine`: recursive StateMachine.

12 - `RSTState`: reStructuredText State superclass.

13 - `Inliner`: For parsing inline markup.

14 - `Body`: Generic classifier of the first line of a block.

15 - `SpecializedBody`: Superclass for compound element members.

16 - `BulletList`: Second and subsequent bullet_list list_items

17 - `DefinitionList`: Second+ definition_list_items.

18 - `EnumeratedList`: Second+ enumerated_list list_items.

19 - `FieldList`: Second+ fields.

20 - `OptionList`: Second+ option_list_items.

21 - `RFC2822List`: Second+ RFC2822-style fields.

22 - `ExtensionOptions`: Parses directive option fields.

23 - `Explicit`: Second+ explicit markup constructs.

24 - `SubstitutionDef`: For embedded directives in substitution definitions.

25 - `Text`: Classifier of second line of a text block.

26 - `SpecializedText`: Superclass for continuation lines of Text-variants.

27 - `Definition`: Second line of potential definition_list_item.

28 - `Line`: Second line of overlined section title or transition marker.

29 - `Struct`: obsolete, use `types.SimpleNamespace`.

31:Exception classes:

32 - `MarkupError`

33 - `ParserError`

34 - `MarkupMismatch`

36:Functions:

37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.

38 - `unescape()`: Return a string, nulls removed or restored to backslashes.

40:Attributes:

41 - `state_classes`: set of State classes used with `RSTStateMachine`.

43Parser Overview

44===============

46The reStructuredText parser is implemented as a recursive state machine,

47examining its input one line at a time. To understand how the parser works,

48please first become familiar with the `docutils.statemachine` module. In the

49description below, references are made to classes defined in this module;

50please see the individual classes for details.

52Parsing proceeds as follows:

541. The state machine examines each line of input, checking each of the

55 transition patterns of the state `Body`, in order, looking for a match.

56 The implicit transitions (blank lines and indentation) are checked before

57 any others. The 'text' transition is a catch-all (matches anything).

592. The method associated with the matched transition pattern is called.

61 A. Some transition methods are self-contained, appending elements to the

62 document tree (`Body.doctest` parses a doctest block). The parser's

63 current line index is advanced to the end of the element, and parsing

64 continues with step 1.

66 B. Other transition methods trigger the creation of a nested state machine,

67 whose job is to parse a compound construct ('indent' does a block quote,

68 'bullet' does a bullet list, 'overline' does a section [first checking

69 for a valid section header], etc.).

71 - In the case of lists and explicit markup, a one-off state machine is

72 created and run to parse contents of the first item.

74 - A new state machine is created and its initial state is set to the

75 appropriate specialized state (`BulletList` in the case of the

76 'bullet' transition; see `SpecializedBody` for more detail). This

77 state machine is run to parse the compound element (or series of

78 explicit markup elements), and returns as soon as a non-member element

79 is encountered. For example, the `BulletList` state machine ends as

80 soon as it encounters an element which is not a list item of that

81 bullet list. The optional omission of inter-element blank lines is

82 enabled by this nested state machine.

84 - The current line index is advanced to the end of the elements parsed,

85 and parsing continues with step 1.

87 C. The result of the 'text' transition depends on the next line of text.

88 The current state is changed to `Text`, under which the second line is

89 examined. If the second line is:

91 - Indented: The element is a definition list item, and parsing proceeds

92 similarly to step 2.B, using the `DefinitionList` state.

94 - A line of uniform punctuation characters: The element is a section

95 header; again, parsing proceeds as in step 2.B, and `Body` is still

96 used.

98 - Anything else: The element is a paragraph, which is examined for

99 inline markup and appended to the parent element. Processing

100 continues with step 1.

101"""

102

103from __future__ import annotations

104

105__docformat__ = 'reStructuredText'

106

107import re

108from types import FunctionType, MethodType

109from types import SimpleNamespace as Struct

110

111from docutils import nodes, statemachine, utils

112from docutils import ApplicationError, DataError

113from docutils.statemachine import StateMachineWS, StateWS

114from docutils.nodes import fully_normalize_name as normalize_name

115from docutils.nodes import unescape, whitespace_normalize_name

116import docutils.parsers.rst

117from docutils.parsers.rst import directives, languages, tableparser, roles

118from docutils.utils import escape2null, column_width

119from docutils.utils import punctuation_chars, urischemes

120from docutils.utils import split_escaped_whitespace

121from docutils.utils._roman_numerals import (InvalidRomanNumeralError,

122 RomanNumeral)

123

124

125class MarkupError(DataError): pass

126class UnknownInterpretedRoleError(DataError): pass

127class InterpretedRoleNotImplementedError(DataError): pass

128class ParserError(ApplicationError): pass

129class MarkupMismatch(Exception): pass

130

131

132class RSTStateMachine(StateMachineWS):

133

134 """

135 reStructuredText's master StateMachine.

136

137 The entry point to reStructuredText parsing is the `run()` method.

138 """

139

140 def run(self, input_lines, document, input_offset=0, match_titles=True,

141 inliner=None) -> None:

142 """

143 Parse `input_lines` and modify the `document` node in place.

144

145 Extend `StateMachineWS.run()`: set up parse-global data and

146 run the StateMachine.

147 """

148 self.language = languages.get_language(

149 document.settings.language_code, document.reporter)

150 self.match_titles = match_titles

151 if inliner is None:

152 inliner = Inliner()

153 inliner.init_customizations(document.settings)

154 self.memo = Struct(document=document,

155 reporter=document.reporter,

156 language=self.language,

157 title_styles=[],

158 section_level=0, # ignored, to be removed in 2.0

159 section_bubble_up_kludge=False, # ignored, ""

160 inliner=inliner)

161 self.document = document

162 self.attach_observer(document.note_source)

163 self.reporter = self.memo.reporter

164 self.node = document

165 results = StateMachineWS.run(self, input_lines, input_offset,

166 input_source=document['source'])

167 assert results == [], 'RSTStateMachine.run() results should be empty!'

168 self.node = self.memo = None # remove unneeded references

169

170

171class NestedStateMachine(StateMachineWS):

172

173 """

174 StateMachine run from within other StateMachine runs, to parse nested

175 document structures.

176 """

177

178 def run(self, input_lines, input_offset, memo, node, match_titles=True):

179 """

180 Parse `input_lines` and populate a `docutils.nodes.document` instance.

181

182 Extend `StateMachineWS.run()`: set up document-wide data.

183 """

184 self.match_titles = match_titles

185 self.memo = memo

186 self.document = memo.document

187 self.attach_observer(self.document.note_source)

188 self.reporter = memo.reporter

189 self.language = memo.language

190 self.node = node

191 results = StateMachineWS.run(self, input_lines, input_offset)

192 assert results == [], ('NestedStateMachine.run() results should be '

193 'empty!')

194 return results

195

196

197class RSTState(StateWS):

198

199 """

200 reStructuredText State superclass.

201

202 Contains methods used by all State subclasses.

203 """

204

205 nested_sm = NestedStateMachine

206 nested_sm_cache = []

207

208 def __init__(self, state_machine, debug=False) -> None:

209 self.nested_sm_kwargs = {'state_classes': state_classes,

210 'initial_state': 'Body'}

211 StateWS.__init__(self, state_machine, debug)

212

213 def runtime_init(self) -> None:

214 StateWS.runtime_init(self)

215 memo = self.state_machine.memo

216 self.memo = memo

217 self.reporter = memo.reporter

218 self.inliner = memo.inliner

219 self.document = memo.document

220 self.parent = self.state_machine.node

221 # enable the reporter to determine source and source-line

222 if not hasattr(self.reporter, 'get_source_and_line'):

223 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501

224

225 def goto_line(self, abs_line_offset) -> None:

226 """

227 Jump to input line `abs_line_offset`, ignoring jumps past the end.

228 """

229 try:

230 self.state_machine.goto_line(abs_line_offset)

231 except EOFError:

232 pass

233

234 def no_match(self, context, transitions):

235 """

236 Override `StateWS.no_match` to generate a system message.

237

238 This code should never be run.

239 """

240 self.reporter.severe(

241 'Internal error: no transition pattern match. State: "%s"; '

242 'transitions: %s; context: %s; current line: %r.'

243 % (self.__class__.__name__, transitions, context,

244 self.state_machine.line))

245 return context, None, []

246

247 def bof(self, context):

248 """Called at beginning of file."""

249 return [], []

250

251 def nested_parse(self, block, input_offset, node, match_titles=False,

252 state_machine_class=None, state_machine_kwargs=None):

253 """

254 Create a new StateMachine rooted at `node` and run it over the input

255 `block`.

256 """

257 use_default = 0

258 if state_machine_class is None:

259 state_machine_class = self.nested_sm

260 use_default += 1

261 if state_machine_kwargs is None:

262 state_machine_kwargs = self.nested_sm_kwargs

263 use_default += 1

264 block_length = len(block)

265

266 state_machine = None

267 if use_default == 2:

268 try:

269 state_machine = self.nested_sm_cache.pop()

270 except IndexError:

271 pass

272 if not state_machine:

273 state_machine = state_machine_class(debug=self.debug,

274 **state_machine_kwargs)

275 state_machine.run(block, input_offset, memo=self.memo,

276 node=node, match_titles=match_titles)

277 if use_default == 2:

278 self.nested_sm_cache.append(state_machine)

279 else:

280 state_machine.unlink()

281 new_offset = state_machine.abs_line_offset()

282 # No `block.parent` implies disconnected -- lines aren't in sync:

283 if block.parent and (len(block) - block_length) != 0:

284 # Adjustment for block if modified in nested parse:

285 self.state_machine.next_line(len(block) - block_length)

286 return new_offset

287

288 def nested_list_parse(self, block, input_offset, node, initial_state,

289 blank_finish,

290 blank_finish_state=None,

291 extra_settings={},

292 match_titles=False,

293 state_machine_class=None,

294 state_machine_kwargs=None):

295 """

296 Create a new StateMachine rooted at `node` and run it over the input

297 `block`. Also keep track of optional intermediate blank lines and the

298 required final one.

299 """

300 if state_machine_class is None:

301 state_machine_class = self.nested_sm

302 if state_machine_kwargs is None:

303 state_machine_kwargs = self.nested_sm_kwargs.copy()

304 state_machine_kwargs['initial_state'] = initial_state

305 state_machine = state_machine_class(debug=self.debug,

306 **state_machine_kwargs)

307 if blank_finish_state is None:

308 blank_finish_state = initial_state

309 state_machine.states[blank_finish_state].blank_finish = blank_finish

310 for key, value in extra_settings.items():

311 setattr(state_machine.states[initial_state], key, value)

312 state_machine.run(block, input_offset, memo=self.memo,

313 node=node, match_titles=match_titles)

314 blank_finish = state_machine.states[blank_finish_state].blank_finish

315 state_machine.unlink()

316 return state_machine.abs_line_offset(), blank_finish

317

318 def section(self, title, source, style, lineno, messages) -> None:

319 """Check for a valid subsection and create one if it checks out."""

320 if self.check_subsection(source, style, lineno):

321 self.new_subsection(title, lineno, messages)

322

323 def check_subsection(self, source, style, lineno) -> bool:

324 """

325 Check for a valid subsection header. Update section data in `memo`.

326

327 When a new section is reached that isn't a subsection of the current

328 section, set `self.parent` to the new section's parent section

329 (or the document if the new section is a top-level section).

330 """

331 title_styles = self.memo.title_styles

332 parent_sections = self.parent.section_hierarchy()

333 # current section level: (0 document, 1 section, 2 subsection, ...)

334 mylevel = len(parent_sections)

335 # Determine the level of the new section:

336 try: # check for existing title style

337 level = title_styles.index(style) + 1

338 except ValueError: # new title style

339 title_styles.append(style)

340 level = len(title_styles)

341 # The new level must not be deeper than an immediate child

342 # of the current level:

343 if level > mylevel + 1:

344 styles = " ".join("/".join(s for s in style)

345 for style in title_styles)

346 self.parent += self.reporter.severe(

347 'Inconsistent title style:'

348 f' skip from level {mylevel} to {level}.',

349 nodes.literal_block('', source),

350 nodes.paragraph('', f'Established title styles: {styles}'),

351 line=lineno)

352 return False

353 # Update parent state:

354 self.memo.section_level = level

355 if level <= mylevel:

356 # new section is sibling or higher up in the section hierarchy

357 self.parent = parent_sections[level-1].parent

358 return True

359

360 def title_inconsistent(self, sourcetext, lineno):

361 # Ignored. Will be removed in Docutils 2.0.

362 error = self.reporter.severe(

363 'Title level inconsistent:', nodes.literal_block('', sourcetext),

364 line=lineno)

365 return error

366

367 def new_subsection(self, title, lineno, messages):

368 """Append new subsection to document tree."""

369 section_node = nodes.section()

370 self.parent += section_node

371 textnodes, title_messages = self.inline_text(title, lineno)

372 titlenode = nodes.title(title, '', *textnodes)

373 name = normalize_name(titlenode.astext())

374 section_node['names'].append(name)

375 section_node += titlenode

376 section_node += messages

377 section_node += title_messages

378 self.document.note_implicit_target(section_node, section_node)

379 # Update state:

380 self.state_machine.node = section_node

381 # Also update the ".parent" attribute in all states.

382 # This is a bit violent, but the state classes copy their .parent from

383 # state_machine.node on creation, so we need to update them. We could

384 # also remove RSTState.parent entirely and replace references to it

385 # with statemachine.node, but that might break code downstream of

386 # docutils.

387 for s in self.state_machine.states.values():

388 s.parent = section_node

389

390 def paragraph(self, lines, lineno):

391 """

392 Return a list (paragraph & messages) & a boolean: literal_block next?

393 """

394 data = '\n'.join(lines).rstrip()

395 if re.search(r'(?<!\\)(\\\\)*::$', data):

396 if len(data) == 2:

397 return [], 1

398 elif data[-3] in ' \n':

399 text = data[:-3].rstrip()

400 else:

401 text = data[:-1]

402 literalnext = 1

403 else:

404 text = data

405 literalnext = 0

406 textnodes, messages = self.inline_text(text, lineno)

407 p = nodes.paragraph(data, '', *textnodes)

408 p.source, p.line = self.state_machine.get_source_and_line(lineno)

409 return [p] + messages, literalnext

410

411 def inline_text(self, text, lineno):

412 """

413 Return 2 lists: nodes (text and inline elements), and system_messages.

414 """

415 nodes, messages = self.inliner.parse(text, lineno,

416 self.memo, self.parent)

417 return nodes, messages

418

419 def unindent_warning(self, node_name):

420 # the actual problem is one line below the current line

421 lineno = self.state_machine.abs_line_number() + 1

422 return self.reporter.warning('%s ends without a blank line; '

423 'unexpected unindent.' % node_name,

424 line=lineno)

425

426

427def build_regexp(definition, compile_patterns=True):

428 """

429 Build, compile and return a regular expression based on `definition`.

430

431 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),

432 where "parts" is a list of regular expressions and/or regular

433 expression definitions to be joined into an or-group.

434 """

435 name, prefix, suffix, parts = definition

436 part_strings = []

437 for part in parts:

438 if isinstance(part, tuple):

439 part_strings.append(build_regexp(part, None))

440 else:

441 part_strings.append(part)

442 or_group = '|'.join(part_strings)

443 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()

444 if compile_patterns:

445 return re.compile(regexp)

446 else:

447 return regexp

448

449

450class Inliner:

451

452 """

453 Parse inline markup; call the `parse()` method.

454 """

455

456 def __init__(self) -> None:

457 self.implicit_dispatch = []

458 """List of (pattern, bound method) tuples, used by

459 `self.implicit_inline`."""

460

461 def init_customizations(self, settings) -> None:

462 # lookahead and look-behind expressions for inline markup rules

463 if getattr(settings, 'character_level_inline_markup', False):

464 start_string_prefix = '(^|(?<!\x00))'

465 end_string_suffix = ''

466 else:

467 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %

468 (punctuation_chars.openers,

469 punctuation_chars.delimiters))

470 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %

471 (punctuation_chars.closing_delimiters,

472 punctuation_chars.delimiters,

473 punctuation_chars.closers))

474 args = locals().copy()

475 args.update(vars(self.__class__))

476

477 parts = ('initial_inline', start_string_prefix, '',

478 [

479 ('start', '', self.non_whitespace_after, # simple start-strings

480 [r'\*\*', # strong

481 r'\*(?!\*)', # emphasis but not strong

482 r'``', # literal

483 r'_`', # inline internal target

484 r'\|(?!\|)'] # substitution reference

485 ),

486 ('whole', '', end_string_suffix, # whole constructs

487 [ # reference name & end-string

488 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,

489 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',

490 [r'[0-9]+', # manually numbered

491 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)

492 r'\*', # auto-symbol

493 r'(?P<citationlabel>%s)' % self.simplename, # citation ref

494 ]

495 )

496 ]

497 ),

498 ('backquote', # interpreted text or phrase reference

499 '(?P<role>(:%s:)?)' % self.simplename, # optional role

500 self.non_whitespace_after,

501 ['`(?!`)'] # but not literal

502 )

503 ]

504 )

505 self.start_string_prefix = start_string_prefix

506 self.end_string_suffix = end_string_suffix

507 self.parts = parts

508

509 self.patterns = Struct(

510 initial=build_regexp(parts),

511 emphasis=re.compile(self.non_whitespace_escape_before

512 + r'(\*)' + end_string_suffix),

513 strong=re.compile(self.non_whitespace_escape_before

514 + r'(\*\*)' + end_string_suffix),

515 interpreted_or_phrase_ref=re.compile(

516 r"""

517 %(non_unescaped_whitespace_escape_before)s

518 (

519 `

520 (?P<suffix>

521 (?P<role>:%(simplename)s:)?

522 (?P<refend>__?)?

523 )

524 )

525 %(end_string_suffix)s

526 """ % args, re.VERBOSE),

527 embedded_link=re.compile(

528 r"""

529 (

530 (?:[ \n]+|^) # spaces or beginning of line/string

531 < # open bracket

532 %(non_whitespace_after)s

533 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets

534 %(non_whitespace_escape_before)s

535 > # close bracket

536 )

537 $ # end of string

538 """ % args, re.VERBOSE),

539 literal=re.compile(self.non_whitespace_before + '(``)'

540 + end_string_suffix),

541 target=re.compile(self.non_whitespace_escape_before

542 + r'(`)' + end_string_suffix),

543 substitution_ref=re.compile(self.non_whitespace_escape_before

544 + r'(\|_{0,2})'

545 + end_string_suffix),

546 email=re.compile(self.email_pattern % args + '$',

547 re.VERBOSE),

548 uri=re.compile(

549 (r"""

550 %(start_string_prefix)s

551 (?P<whole>

552 (?P<absolute> # absolute URI

553 (?P<scheme> # scheme (http, ftp, mailto)

554 [a-zA-Z][a-zA-Z0-9.+-]*

555 )

556 :

557 (

558 ( # either:

559 (//?)? # hierarchical URI

560 %(uric)s* # URI characters

561 %(uri_end)s # final URI char

562 )

563 ( # optional query

564 \?%(uric)s*

565 %(uri_end)s

566 )?

567 ( # optional fragment

568 \#%(uric)s*

569 %(uri_end)s

570 )?

571 )

572 )

573 | # *OR*

574 (?P<email> # email address

575 """ + self.email_pattern + r"""

576 )

577 )

578 %(end_string_suffix)s

579 """) % args, re.VERBOSE),

580 pep=re.compile(

581 r"""

582 %(start_string_prefix)s

583 (

584 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file

585 |

586 (PEP\s+(?P<pepnum2>\d+)) # reference by name

587 )

588 %(end_string_suffix)s""" % args, re.VERBOSE),

589 rfc=re.compile(

590 r"""

591 %(start_string_prefix)s

592 (RFC(-|\s+)?(?P<rfcnum>\d+))

593 %(end_string_suffix)s""" % args, re.VERBOSE))

594

595 self.implicit_dispatch.append((self.patterns.uri,

596 self.standalone_uri))

597 if settings.pep_references:

598 self.implicit_dispatch.append((self.patterns.pep,

599 self.pep_reference))

600 if settings.rfc_references:

601 self.implicit_dispatch.append((self.patterns.rfc,

602 self.rfc_reference))

603

604 def parse(self, text, lineno, memo, parent):

605 # Needs to be refactored for nested inline markup.

606 # Add nested_parse() method?

607 """

608 Return 2 lists: nodes (text and inline elements), and system_messages.

609

610 Using `self.patterns.initial`, a pattern which matches start-strings

611 (emphasis, strong, interpreted, phrase reference, literal,

612 substitution reference, and inline target) and complete constructs

613 (simple reference, footnote reference), search for a candidate. When

614 one is found, check for validity (e.g., not a quoted '*' character).

615 If valid, search for the corresponding end string if applicable, and

616 check it for validity. If not found or invalid, generate a warning

617 and ignore the start-string. Implicit inline markup (e.g. standalone

618 URIs) is found last.

619

620 :text: source string

621 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`

622 """

623 self.reporter = memo.reporter

624 self.document = memo.document

625 self.language = memo.language

626 self.parent = parent

627 pattern_search = self.patterns.initial.search

628 dispatch = self.dispatch

629 remaining = escape2null(text)

630 processed = []

631 unprocessed = []

632 messages = []

633 while remaining:

634 match = pattern_search(remaining)

635 if match:

636 groups = match.groupdict()

637 method = dispatch[groups['start'] or groups['backquote']

638 or groups['refend'] or groups['fnend']]

639 before, inlines, remaining, sysmessages = method(self, match,

640 lineno)

641 unprocessed.append(before)

642 messages += sysmessages

643 if inlines:

644 processed += self.implicit_inline(''.join(unprocessed),

645 lineno)

646 processed += inlines

647 unprocessed = []

648 else:

649 break

650 remaining = ''.join(unprocessed) + remaining

651 if remaining:

652 processed += self.implicit_inline(remaining, lineno)

653 return processed, messages

654

655 # Inline object recognition

656 # -------------------------

657 # See also init_customizations().

658 non_whitespace_before = r'(?<!\s)'

659 non_whitespace_escape_before = r'(?<![\s\x00])'

660 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'

661 non_whitespace_after = r'(?!\s)'

662 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):

663 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'

664 # Valid URI characters (see RFC 2396 & RFC 2732);

665 # final \x00 allows backslash escapes in URIs:

666 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""

667 # Delimiter indicating the end of a URI (not part of the URI):

668 uri_end_delim = r"""[>]"""

669 # Last URI character; same as uric but no punctuation:

670 urilast = r"""[_~*/=+a-zA-Z0-9]"""

671 # End of a URI (either 'urilast' or 'uric followed by a

672 # uri_end_delim'):

673 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()

674 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""

675 email_pattern = r"""

676 %(emailc)s+(?:\.%(emailc)s+)* # name

677 (?<!\x00)@ # at

678 %(emailc)s+(?:\.%(emailc)s*)* # host

679 %(uri_end)s # final URI char

680 """

681

682 def quoted_start(self, match):

683 """Test if inline markup start-string is 'quoted'.

684

685 'Quoted' in this context means the start-string is enclosed in a pair

686 of matching opening/closing delimiters (not necessarily quotes)

687 or at the end of the match.

688 """

689 string = match.string

690 start = match.start()

691 if start == 0: # start-string at beginning of text

692 return False

693 prestart = string[start - 1]

694 try:

695 poststart = string[match.end()]

696 except IndexError: # start-string at end of text

697 return True # not "quoted" but no markup start-string either

698 return punctuation_chars.match_chars(prestart, poststart)

699

700 def inline_obj(self, match, lineno, end_pattern, nodeclass,

701 restore_backslashes=False):

702 string = match.string

703 matchstart = match.start('start')

704 matchend = match.end('start')

705 if self.quoted_start(match):

706 return string[:matchend], [], string[matchend:], [], ''

707 endmatch = end_pattern.search(string[matchend:])

708 if endmatch and endmatch.start(1): # 1 or more chars

709 text = endmatch.string[:endmatch.start(1)]

710 if restore_backslashes:

711 text = unescape(text, True)

712 textend = matchend + endmatch.end(1)

713 rawsource = unescape(string[matchstart:textend], True)

714 node = nodeclass(rawsource, text)

715 return (string[:matchstart], [node],

716 string[textend:], [], endmatch.group(1))

717 msg = self.reporter.warning(

718 'Inline %s start-string without end-string.'

719 % nodeclass.__name__, line=lineno)

720 text = unescape(string[matchstart:matchend], True)

721 prb = self.problematic(text, text, msg)

722 return string[:matchstart], [prb], string[matchend:], [msg], ''

723

724 def problematic(self, text, rawsource, message):

725 msgid = self.document.set_id(message, self.parent)

726 problematic = nodes.problematic(rawsource, text, refid=msgid)

727 prbid = self.document.set_id(problematic)

728 message.add_backref(prbid)

729 return problematic

730

731 def emphasis(self, match, lineno):

732 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

733 match, lineno, self.patterns.emphasis, nodes.emphasis)

734 return before, inlines, remaining, sysmessages

735

736 def strong(self, match, lineno):

737 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

738 match, lineno, self.patterns.strong, nodes.strong)

739 return before, inlines, remaining, sysmessages

740

741 def interpreted_or_phrase_ref(self, match, lineno):

742 end_pattern = self.patterns.interpreted_or_phrase_ref

743 string = match.string

744 matchstart = match.start('backquote')

745 matchend = match.end('backquote')

746 rolestart = match.start('role')

747 role = match.group('role')

748 position = ''

749 if role:

750 role = role[1:-1]

751 position = 'prefix'

752 elif self.quoted_start(match):

753 return string[:matchend], [], string[matchend:], []

754 endmatch = end_pattern.search(string[matchend:])

755 if endmatch and endmatch.start(1): # 1 or more chars

756 textend = matchend + endmatch.end()

757 if endmatch.group('role'):

758 if role:

759 msg = self.reporter.warning(

760 'Multiple roles in interpreted text (both '

761 'prefix and suffix present; only one allowed).',

762 line=lineno)

763 text = unescape(string[rolestart:textend], True)

764 prb = self.problematic(text, text, msg)

765 return string[:rolestart], [prb], string[textend:], [msg]

766 role = endmatch.group('suffix')[1:-1]

767 position = 'suffix'

768 escaped = endmatch.string[:endmatch.start(1)]

769 rawsource = unescape(string[matchstart:textend], True)

770 if rawsource[-1:] == '_':

771 if role:

772 msg = self.reporter.warning(

773 'Mismatch: both interpreted text role %s and '

774 'reference suffix.' % position, line=lineno)

775 text = unescape(string[rolestart:textend], True)

776 prb = self.problematic(text, text, msg)

777 return string[:rolestart], [prb], string[textend:], [msg]

778 return self.phrase_ref(string[:matchstart], string[textend:],

779 rawsource, escaped)

780 else:

781 rawsource = unescape(string[rolestart:textend], True)

782 nodelist, messages = self.interpreted(rawsource, escaped, role,

783 lineno)

784 return (string[:rolestart], nodelist,

785 string[textend:], messages)

786 msg = self.reporter.warning(

787 'Inline interpreted text or phrase reference start-string '

788 'without end-string.', line=lineno)

789 text = unescape(string[matchstart:matchend], True)

790 prb = self.problematic(text, text, msg)

791 return string[:matchstart], [prb], string[matchend:], [msg]

792

793 def phrase_ref(self, before, after, rawsource, escaped, text=None):

794 # `text` is ignored (since 0.16)

795 match = self.patterns.embedded_link.search(escaped)

796 if match: # embedded <URI> or <alias_>

797 text = escaped[:match.start(0)]

798 unescaped = unescape(text)

799 rawtext = unescape(text, True)

800 aliastext = match.group(2)

801 rawaliastext = unescape(aliastext, True)

802 underscore_escaped = rawaliastext.endswith(r'\_')

803 if (aliastext.endswith('_')

804 and not (underscore_escaped

805 or self.patterns.uri.match(aliastext))):

806 aliastype = 'name'

807 alias = normalize_name(unescape(aliastext[:-1]))

808 target = nodes.target(match.group(1), refname=alias)

809 target.indirect_reference_name = whitespace_normalize_name(

810 unescape(aliastext[:-1]))

811 else:

812 aliastype = 'uri'

813 # remove unescaped whitespace

814 alias_parts = split_escaped_whitespace(match.group(2))

815 alias = ' '.join(''.join(part.split())

816 for part in alias_parts)

817 alias = self.adjust_uri(unescape(alias))

818 if alias.endswith(r'\_'):

819 alias = alias[:-2] + '_'

820 target = nodes.target(match.group(1), refuri=alias)

821 target.referenced = 1

822 if not aliastext:

823 raise ApplicationError('problem with embedded link: %r'

824 % aliastext)

825 if not text:

826 text = alias

827 unescaped = unescape(text)

828 rawtext = rawaliastext

829 else:

830 text = escaped

831 unescaped = unescape(text)

832 target = None

833 rawtext = unescape(escaped, True)

834

835 refname = normalize_name(unescaped)

836 reference = nodes.reference(rawsource, text,

837 name=whitespace_normalize_name(unescaped))

838 reference[0].rawsource = rawtext

839

840 node_list = [reference]

841

842 if rawsource[-2:] == '__':

843 if target and (aliastype == 'name'):

844 reference['refname'] = alias

845 self.document.note_refname(reference)

846 # self.document.note_indirect_target(target) # required?

847 elif target and (aliastype == 'uri'):

848 reference['refuri'] = alias

849 else:

850 reference['anonymous'] = True

851 else:

852 if target:

853 target['names'].append(refname)

854 if aliastype == 'name':

855 reference['refname'] = alias

856 self.document.note_indirect_target(target)

857 self.document.note_refname(reference)

858 else:

859 reference['refuri'] = alias

860 # target.note_referenced_by(name=refname)

861 self.document.note_explicit_target(target, self.parent)

862 node_list.append(target)

863 else:

864 reference['refname'] = refname

865 self.document.note_refname(reference)

866 return before, node_list, after, []

867

868 def adjust_uri(self, uri):

869 match = self.patterns.email.match(uri)

870 if match:

871 return 'mailto:' + uri

872 else:

873 return uri

874

875 def interpreted(self, rawsource, text, role, lineno):

876 role_fn, messages = roles.role(role, self.language, lineno,

877 self.reporter)

878 if role_fn:

879 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)

880 return nodes, messages + messages2

881 else:

882 msg = self.reporter.error(

883 'Unknown interpreted text role "%s".' % role,

884 line=lineno)

885 return ([self.problematic(rawsource, rawsource, msg)],

886 messages + [msg])

887

888 def literal(self, match, lineno):

889 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

890 match, lineno, self.patterns.literal, nodes.literal,

891 restore_backslashes=True)

892 return before, inlines, remaining, sysmessages

893

894 def inline_internal_target(self, match, lineno):

895 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

896 match, lineno, self.patterns.target, nodes.target)

897 if inlines and isinstance(inlines[0], nodes.target):

898 assert len(inlines) == 1

899 target = inlines[0]

900 name = normalize_name(target.astext())

901 target['names'].append(name)

902 self.document.note_explicit_target(target, self.parent)

903 return before, inlines, remaining, sysmessages

904

905 def substitution_reference(self, match, lineno):

906 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

907 match, lineno, self.patterns.substitution_ref,

908 nodes.substitution_reference)

909 if len(inlines) == 1:

910 subref_node = inlines[0]

911 if isinstance(subref_node, nodes.substitution_reference):

912 subref_text = subref_node.astext()

913 self.document.note_substitution_ref(subref_node, subref_text)

914 if endstring[-1:] == '_':

915 reference_node = nodes.reference(

916 '|%s%s' % (subref_text, endstring), '')

917 if endstring[-2:] == '__':

918 reference_node['anonymous'] = True

919 else:

920 reference_node['refname'] = normalize_name(subref_text)

921 self.document.note_refname(reference_node)

922 reference_node += subref_node

923 inlines = [reference_node]

924 return before, inlines, remaining, sysmessages

925

926 def footnote_reference(self, match, lineno):

927 """

928 Handles `nodes.footnote_reference` and `nodes.citation_reference`

929 elements.

930 """

931 label = match.group('footnotelabel')

932 refname = normalize_name(label)

933 string = match.string

934 before = string[:match.start('whole')]

935 remaining = string[match.end('whole'):]

936 if match.group('citationlabel'):

937 refnode = nodes.citation_reference('[%s]_' % label,

938 refname=refname)

939 refnode += nodes.Text(label)

940 self.document.note_citation_ref(refnode)

941 else:

942 refnode = nodes.footnote_reference('[%s]_' % label)

943 if refname[0] == '#':

944 refname = refname[1:]

945 refnode['auto'] = 1

946 self.document.note_autofootnote_ref(refnode)

947 elif refname == '*':

948 refname = ''

949 refnode['auto'] = '*'

950 self.document.note_symbol_footnote_ref(

951 refnode)

952 else:

953 refnode += nodes.Text(label)

954 if refname:

955 refnode['refname'] = refname

956 self.document.note_footnote_ref(refnode)

957 if utils.get_trim_footnote_ref_space(self.document.settings):

958 before = before.rstrip()

959 return before, [refnode], remaining, []

960

961 def reference(self, match, lineno, anonymous=False):

962 referencename = match.group('refname')

963 refname = normalize_name(referencename)

964 referencenode = nodes.reference(

965 referencename + match.group('refend'), referencename,

966 name=whitespace_normalize_name(referencename))

967 referencenode[0].rawsource = referencename

968 if anonymous:

969 referencenode['anonymous'] = True

970 else:

971 referencenode['refname'] = refname

972 self.document.note_refname(referencenode)

973 string = match.string

974 matchstart = match.start('whole')

975 matchend = match.end('whole')

976 return string[:matchstart], [referencenode], string[matchend:], []

977

978 def anonymous_reference(self, match, lineno):

979 return self.reference(match, lineno, anonymous=True)

980

981 def standalone_uri(self, match, lineno):

982 if (not match.group('scheme')

983 or match.group('scheme').lower() in urischemes.schemes):

984 if match.group('email'):

985 addscheme = 'mailto:'

986 else:

987 addscheme = ''

988 text = match.group('whole')

989 refuri = addscheme + unescape(text)

990 reference = nodes.reference(unescape(text, True), text,

991 refuri=refuri)

992 return [reference]

993 else: # not a valid scheme

994 raise MarkupMismatch

995

996 def pep_reference(self, match, lineno):

997 text = match.group(0)

998 if text.startswith('pep-'):

999 pepnum = int(unescape(match.group('pepnum1')))

1000 elif text.startswith('PEP'):

1001 pepnum = int(unescape(match.group('pepnum2')))

1002 else:

1003 raise MarkupMismatch

1004 ref = (self.document.settings.pep_base_url

1005 + self.document.settings.pep_file_url_template % pepnum)

1006 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1007

1008 rfc_url = 'rfc%d.html'

1009

1010 def rfc_reference(self, match, lineno):

1011 text = match.group(0)

1012 if text.startswith('RFC'):

1013 rfcnum = int(unescape(match.group('rfcnum')))

1014 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum

1015 else:

1016 raise MarkupMismatch

1017 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1018

1019 def implicit_inline(self, text, lineno):

1020 """

1021 Check each of the patterns in `self.implicit_dispatch` for a match,

1022 and dispatch to the stored method for the pattern. Recursively check

1023 the text before and after the match. Return a list of `nodes.Text`

1024 and inline element nodes.

1025 """

1026 if not text:

1027 return []

1028 for pattern, method in self.implicit_dispatch:

1029 match = pattern.search(text)

1030 if match:

1031 try:

1032 # Must recurse on strings before *and* after the match;

1033 # there may be multiple patterns.

1034 return (self.implicit_inline(text[:match.start()], lineno)

1035 + method(match, lineno)

1036 + self.implicit_inline(text[match.end():], lineno))

1037 except MarkupMismatch:

1038 pass

1039 return [nodes.Text(text)]

1040

1041 dispatch = {'*': emphasis,

1042 '**': strong,

1043 '`': interpreted_or_phrase_ref,

1044 '``': literal,

1045 '_`': inline_internal_target,

1046 ']_': footnote_reference,

1047 '|': substitution_reference,

1048 '_': reference,

1049 '__': anonymous_reference}

1050

1051

1052def _loweralpha_to_int(s, _zero=(ord('a')-1)):

1053 return ord(s) - _zero

1054

1055

1056def _upperalpha_to_int(s, _zero=(ord('A')-1)):

1057 return ord(s) - _zero

1058

1059

1060class Body(RSTState):

1061

1062 """

1063 Generic classifier of the first line of a block.

1064 """

1065

1066 double_width_pad_char = tableparser.TableParser.double_width_pad_char

1067 """Padding character for East Asian double-width text."""

1068

1069 enum = Struct()

1070 """Enumerated list parsing information."""

1071

1072 enum.formatinfo = {

1073 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),

1074 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),

1075 'period': Struct(prefix='', suffix='.', start=0, end=-1)}

1076 enum.formats = enum.formatinfo.keys()

1077 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',

1078 'lowerroman', 'upperroman'] # ORDERED!

1079 enum.sequencepats = {'arabic': '[0-9]+',

1080 'loweralpha': '[a-z]',

1081 'upperalpha': '[A-Z]',

1082 'lowerroman': '[ivxlcdm]+',

1083 'upperroman': '[IVXLCDM]+'}

1084 enum.converters = {'arabic': int,

1085 'loweralpha': _loweralpha_to_int,

1086 'upperalpha': _upperalpha_to_int,

1087 'lowerroman': RomanNumeral.from_string,

1088 'upperroman': RomanNumeral.from_string}

1089

1090 enum.sequenceregexps = {}

1091 for sequence in enum.sequences:

1092 enum.sequenceregexps[sequence] = re.compile(

1093 enum.sequencepats[sequence] + '$')

1094

1095 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')

1096 """Matches the top (& bottom) of a full table)."""

1097

1098 simple_table_top_pat = re.compile('=+( +=+)+ *$')

1099 """Matches the top of a simple table."""

1100

1101 simple_table_border_pat = re.compile('=+[ =]*$')

1102 """Matches the bottom & header bottom of a simple table."""

1103

1104 pats = {}

1105 """Fragments of patterns used by transitions."""

1106

1107 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'

1108 pats['alpha'] = '[a-zA-Z]'

1109 pats['alphanum'] = '[a-zA-Z0-9]'

1110 pats['alphanumplus'] = '[a-zA-Z0-9_-]'

1111 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'

1112 '|%(upperroman)s|#)' % enum.sequencepats)

1113 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats

1114 # @@@ Loosen up the pattern? Allow Unicode?

1115 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats

1116 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats

1117 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats

1118 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats

1119

1120 for format in enum.formats:

1121 pats[format] = '(?P<%s>%s%s%s)' % (

1122 format, re.escape(enum.formatinfo[format].prefix),

1123 pats['enum'], re.escape(enum.formatinfo[format].suffix))

1124

1125 patterns = {

1126 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',

1127 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,

1128 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',

1129 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,

1130 'doctest': r'>>>( +|$)',

1131 'line_block': r'\|( +|$)',

1132 'grid_table_top': grid_table_top_pat,

1133 'simple_table_top': simple_table_top_pat,

1134 'explicit_markup': r'\.\.( +|$)',

1135 'anonymous': r'__( +|$)',

1136 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,

1137 'text': r''}

1138 initial_transitions = (

1139 'bullet',

1140 'enumerator',

1141 'field_marker',

1142 'option_marker',

1143 'doctest',

1144 'line_block',

1145 'grid_table_top',

1146 'simple_table_top',

1147 'explicit_markup',

1148 'anonymous',

1149 'line',

1150 'text')

1151

1152 def indent(self, match, context, next_state):

1153 """Block quote."""

1154 (indented, indent, line_offset, blank_finish

1155 ) = self.state_machine.get_indented()

1156 elements = self.block_quote(indented, line_offset)

1157 self.parent += elements

1158 if not blank_finish:

1159 self.parent += self.unindent_warning('Block quote')

1160 return context, next_state, []

1161

1162 def block_quote(self, indented, line_offset):

1163 elements = []

1164 while indented:

1165 blockquote = nodes.block_quote(rawsource='\n'.join(indented))

1166 (blockquote.source, blockquote.line

1167 ) = self.state_machine.get_source_and_line(line_offset+1)

1168 (blockquote_lines,

1169 attribution_lines,

1170 attribution_offset,

1171 indented,

1172 new_line_offset) = self.split_attribution(indented, line_offset)

1173 self.nested_parse(blockquote_lines, line_offset, blockquote)

1174 elements.append(blockquote)

1175 if attribution_lines:

1176 attribution, messages = self.parse_attribution(

1177 attribution_lines, line_offset+attribution_offset)

1178 blockquote += attribution

1179 elements += messages

1180 line_offset = new_line_offset

1181 while indented and not indented[0]:

1182 indented = indented[1:]

1183 line_offset += 1

1184 return elements

1185

1186 # U+2014 is an em-dash:

1187 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')

1188

1189 def split_attribution(self, indented, line_offset):

1190 """

1191 Check for a block quote attribution and split it off:

1192

1193 * First line after a blank line must begin with a dash ("--", "---",

1194 em-dash; matches `self.attribution_pattern`).

1195 * Every line after that must have consistent indentation.

1196 * Attributions must be preceded by block quote content.

1197

1198 Return a tuple of: (block quote content lines, attribution lines,

1199 attribution offset, remaining indented lines, remaining lines offset).

1200 """

1201 blank = None

1202 nonblank_seen = False

1203 for i in range(len(indented)):

1204 line = indented[i].rstrip()

1205 if line:

1206 if nonblank_seen and blank == i - 1: # last line blank

1207 match = self.attribution_pattern.match(line)

1208 if match:

1209 attribution_end, indent = self.check_attribution(

1210 indented, i)

1211 if attribution_end:

1212 a_lines = indented[i:attribution_end]

1213 a_lines.trim_left(match.end(), end=1)

1214 a_lines.trim_left(indent, start=1)

1215 return (indented[:i], a_lines,

1216 i, indented[attribution_end:],

1217 line_offset + attribution_end)

1218 nonblank_seen = True

1219 else:

1220 blank = i

1221 else:

1222 return indented, None, None, None, None

1223

1224 def check_attribution(self, indented, attribution_start):

1225 """

1226 Check attribution shape.

1227 Return the index past the end of the attribution, and the indent.

1228 """

1229 indent = None

1230 i = attribution_start + 1

1231 for i in range(attribution_start + 1, len(indented)):

1232 line = indented[i].rstrip()

1233 if not line:

1234 break

1235 if indent is None:

1236 indent = len(line) - len(line.lstrip())

1237 elif len(line) - len(line.lstrip()) != indent:

1238 return None, None # bad shape; not an attribution

1239 else:

1240 # return index of line after last attribution line:

1241 i += 1

1242 return i, (indent or 0)

1243

1244 def parse_attribution(self, indented, line_offset):

1245 text = '\n'.join(indented).rstrip()

1246 lineno = 1 + line_offset # line_offset is zero-based

1247 textnodes, messages = self.inline_text(text, lineno)

1248 node = nodes.attribution(text, '', *textnodes)

1249 node.source, node.line = self.state_machine.get_source_and_line(lineno)

1250 return node, messages

1251

1252 def bullet(self, match, context, next_state):

1253 """Bullet list item."""

1254 ul = nodes.bullet_list()

1255 ul.source, ul.line = self.state_machine.get_source_and_line()

1256 self.parent += ul

1257 ul['bullet'] = match.string[0]

1258 i, blank_finish = self.list_item(match.end())

1259 ul += i

1260 offset = self.state_machine.line_offset + 1 # next line

1261 new_line_offset, blank_finish = self.nested_list_parse(

1262 self.state_machine.input_lines[offset:],

1263 input_offset=self.state_machine.abs_line_offset() + 1,

1264 node=ul, initial_state='BulletList',

1265 blank_finish=blank_finish)

1266 self.goto_line(new_line_offset)

1267 if not blank_finish:

1268 self.parent += self.unindent_warning('Bullet list')

1269 return [], next_state, []

1270

1271 def list_item(self, indent):

1272 src, srcline = self.state_machine.get_source_and_line()

1273 if self.state_machine.line[indent:]:

1274 indented, line_offset, blank_finish = (

1275 self.state_machine.get_known_indented(indent))

1276 else:

1277 indented, indent, line_offset, blank_finish = (

1278 self.state_machine.get_first_known_indented(indent))

1279 listitem = nodes.list_item('\n'.join(indented))

1280 listitem.source, listitem.line = src, srcline

1281 if indented:

1282 self.nested_parse(indented, input_offset=line_offset,

1283 node=listitem)

1284 return listitem, blank_finish

1285

1286 def enumerator(self, match, context, next_state):

1287 """Enumerated List Item"""

1288 format, sequence, text, ordinal = self.parse_enumerator(match)

1289 if not self.is_enumerated_list_item(ordinal, sequence, format):

1290 raise statemachine.TransitionCorrection('text')

1291 enumlist = nodes.enumerated_list()

1292 (enumlist.source,

1293 enumlist.line) = self.state_machine.get_source_and_line()

1294 self.parent += enumlist

1295 if sequence == '#':

1296 enumlist['enumtype'] = 'arabic'

1297 else:

1298 enumlist['enumtype'] = sequence

1299 enumlist['prefix'] = self.enum.formatinfo[format].prefix

1300 enumlist['suffix'] = self.enum.formatinfo[format].suffix

1301 if ordinal != 1:

1302 enumlist['start'] = ordinal

1303 msg = self.reporter.info(

1304 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'

1305 % (text, ordinal), base_node=enumlist)

1306 self.parent += msg

1307 listitem, blank_finish = self.list_item(match.end())

1308 enumlist += listitem

1309 offset = self.state_machine.line_offset + 1 # next line

1310 newline_offset, blank_finish = self.nested_list_parse(

1311 self.state_machine.input_lines[offset:],

1312 input_offset=self.state_machine.abs_line_offset() + 1,

1313 node=enumlist, initial_state='EnumeratedList',

1314 blank_finish=blank_finish,

1315 extra_settings={'lastordinal': ordinal,

1316 'format': format,

1317 'auto': sequence == '#'})

1318 self.goto_line(newline_offset)

1319 if not blank_finish:

1320 self.parent += self.unindent_warning('Enumerated list')

1321 return [], next_state, []

1322

1323 def parse_enumerator(self, match, expected_sequence=None):

1324 """

1325 Analyze an enumerator and return the results.

1326

1327 :Return:

1328 - the enumerator format ('period', 'parens', or 'rparen'),

1329 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),

1330 - the text of the enumerator, stripped of formatting, and

1331 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;

1332 ``None`` is returned for invalid enumerator text).

1333

1334 The enumerator format has already been determined by the regular

1335 expression match. If `expected_sequence` is given, that sequence is

1336 tried first. If not, we check for Roman numeral 1. This way,

1337 single-character Roman numerals (which are also alphabetical) can be

1338 matched. If no sequence has been matched, all sequences are checked in

1339 order.

1340 """

1341 groupdict = match.groupdict()

1342 sequence = ''

1343 for format in self.enum.formats:

1344 if groupdict[format]: # was this the format matched?

1345 break # yes; keep `format`

1346 else: # shouldn't happen

1347 raise ParserError('enumerator format not matched')

1348 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501

1349 : self.enum.formatinfo[format].end]

1350 if text == '#':

1351 sequence = '#'

1352 elif expected_sequence:

1353 try:

1354 if self.enum.sequenceregexps[expected_sequence].match(text):

1355 sequence = expected_sequence

1356 except KeyError: # shouldn't happen

1357 raise ParserError('unknown enumerator sequence: %s'

1358 % sequence)

1359 elif text == 'i':

1360 sequence = 'lowerroman'

1361 elif text == 'I':

1362 sequence = 'upperroman'

1363 if not sequence:

1364 for sequence in self.enum.sequences:

1365 if self.enum.sequenceregexps[sequence].match(text):

1366 break

1367 else: # shouldn't happen

1368 raise ParserError('enumerator sequence not matched')

1369 if sequence == '#':

1370 ordinal = 1

1371 else:

1372 try:

1373 ordinal = int(self.enum.converters[sequence](text))

1374 except InvalidRomanNumeralError:

1375 ordinal = None

1376 return format, sequence, text, ordinal

1377

1378 def is_enumerated_list_item(self, ordinal, sequence, format):

1379 """

1380 Check validity based on the ordinal value and the second line.

1381

1382 Return true if the ordinal is valid and the second line is blank,

1383 indented, or starts with the next enumerator or an auto-enumerator.

1384 """

1385 if ordinal is None:

1386 return None

1387 try:

1388 next_line = self.state_machine.next_line()

1389 except EOFError: # end of input lines

1390 self.state_machine.previous_line()

1391 return 1

1392 else:

1393 self.state_machine.previous_line()

1394 if not next_line[:1].strip(): # blank or indented

1395 return 1

1396 result = self.make_enumerator(ordinal + 1, sequence, format)

1397 if result:

1398 next_enumerator, auto_enumerator = result

1399 try:

1400 if next_line.startswith((next_enumerator, auto_enumerator)):

1401 return 1

1402 except TypeError:

1403 pass

1404 return None

1405

1406 def make_enumerator(self, ordinal, sequence, format):

1407 """

1408 Construct and return the next enumerated list item marker, and an

1409 auto-enumerator ("#" instead of the regular enumerator).

1410

1411 Return ``None`` for invalid (out of range) ordinals.

1412 """

1413 if sequence == '#':

1414 enumerator = '#'

1415 elif sequence == 'arabic':

1416 enumerator = str(ordinal)

1417 else:

1418 if sequence.endswith('alpha'):

1419 if ordinal > 26:

1420 return None

1421 enumerator = chr(ordinal + ord('a') - 1)

1422 elif sequence.endswith('roman'):

1423 try:

1424 enumerator = RomanNumeral(ordinal).to_uppercase()

1425 except TypeError:

1426 return None

1427 else: # shouldn't happen

1428 raise ParserError('unknown enumerator sequence: "%s"'

1429 % sequence)

1430 if sequence.startswith('lower'):

1431 enumerator = enumerator.lower()

1432 elif sequence.startswith('upper'):

1433 enumerator = enumerator.upper()

1434 else: # shouldn't happen

1435 raise ParserError('unknown enumerator sequence: "%s"'

1436 % sequence)

1437 formatinfo = self.enum.formatinfo[format]

1438 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix

1439 + ' ')

1440 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '

1441 return next_enumerator, auto_enumerator

1442

1443 def field_marker(self, match, context, next_state):

1444 """Field list item."""

1445 field_list = nodes.field_list()

1446 self.parent += field_list

1447 field, blank_finish = self.field(match)

1448 field_list += field

1449 offset = self.state_machine.line_offset + 1 # next line

1450 newline_offset, blank_finish = self.nested_list_parse(

1451 self.state_machine.input_lines[offset:],

1452 input_offset=self.state_machine.abs_line_offset() + 1,

1453 node=field_list, initial_state='FieldList',

1454 blank_finish=blank_finish)

1455 self.goto_line(newline_offset)

1456 if not blank_finish:

1457 self.parent += self.unindent_warning('Field list')

1458 return [], next_state, []

1459

1460 def field(self, match):

1461 name = self.parse_field_marker(match)

1462 src, srcline = self.state_machine.get_source_and_line()

1463 lineno = self.state_machine.abs_line_number()

1464 (indented, indent, line_offset, blank_finish

1465 ) = self.state_machine.get_first_known_indented(match.end())

1466 field_node = nodes.field()

1467 field_node.source = src

1468 field_node.line = srcline

1469 name_nodes, name_messages = self.inline_text(name, lineno)

1470 field_node += nodes.field_name(name, '', *name_nodes)

1471 field_body = nodes.field_body('\n'.join(indented), *name_messages)

1472 field_node += field_body

1473 if indented:

1474 self.parse_field_body(indented, line_offset, field_body)

1475 return field_node, blank_finish

1476

1477 def parse_field_marker(self, match):

1478 """Extract & return field name from a field marker match."""

1479 field = match.group()[1:] # strip off leading ':'

1480 field = field[:field.rfind(':')] # strip off trailing ':' etc.

1481 return field

1482

1483 def parse_field_body(self, indented, offset, node) -> None:

1484 self.nested_parse(indented, input_offset=offset, node=node)

1485

1486 def option_marker(self, match, context, next_state):

1487 """Option list item."""

1488 optionlist = nodes.option_list()

1489 (optionlist.source, optionlist.line

1490 ) = self.state_machine.get_source_and_line()

1491 try:

1492 listitem, blank_finish = self.option_list_item(match)

1493 except MarkupError as error:

1494 # This shouldn't happen; pattern won't match.

1495 msg = self.reporter.error('Invalid option list marker: %s'

1496 % error)

1497 self.parent += msg

1498 (indented, indent, line_offset, blank_finish

1499 ) = self.state_machine.get_first_known_indented(match.end())

1500 elements = self.block_quote(indented, line_offset)

1501 self.parent += elements

1502 if not blank_finish:

1503 self.parent += self.unindent_warning('Option list')

1504 return [], next_state, []

1505 self.parent += optionlist

1506 optionlist += listitem

1507 offset = self.state_machine.line_offset + 1 # next line

1508 newline_offset, blank_finish = self.nested_list_parse(

1509 self.state_machine.input_lines[offset:],

1510 input_offset=self.state_machine.abs_line_offset() + 1,

1511 node=optionlist, initial_state='OptionList',

1512 blank_finish=blank_finish)

1513 self.goto_line(newline_offset)

1514 if not blank_finish:

1515 self.parent += self.unindent_warning('Option list')

1516 return [], next_state, []

1517

1518 def option_list_item(self, match):

1519 offset = self.state_machine.abs_line_offset()

1520 options = self.parse_option_marker(match)

1521 (indented, indent, line_offset, blank_finish

1522 ) = self.state_machine.get_first_known_indented(match.end())

1523 if not indented: # not an option list item

1524 self.goto_line(offset)

1525 raise statemachine.TransitionCorrection('text')

1526 option_group = nodes.option_group('', *options)

1527 description = nodes.description('\n'.join(indented))

1528 option_list_item = nodes.option_list_item('', option_group,

1529 description)

1530 if indented:

1531 self.nested_parse(indented, input_offset=line_offset,

1532 node=description)

1533 return option_list_item, blank_finish

1534

1535 def parse_option_marker(self, match):

1536 """

1537 Return a list of `node.option` and `node.option_argument` objects,

1538 parsed from an option marker match.

1539

1540 :Exception: `MarkupError` for invalid option markers.

1541 """

1542 optlist = []

1543 # split at ", ", except inside < > (complex arguments)

1544 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())

1545 for optionstring in optionstrings:

1546 tokens = optionstring.split()

1547 delimiter = ' '

1548 firstopt = tokens[0].split('=', 1)

1549 if len(firstopt) > 1:

1550 # "--opt=value" form

1551 tokens[:1] = firstopt

1552 delimiter = '='

1553 elif (len(tokens[0]) > 2

1554 and ((tokens[0].startswith('-')

1555 and not tokens[0].startswith('--'))

1556 or tokens[0].startswith('+'))):

1557 # "-ovalue" form

1558 tokens[:1] = [tokens[0][:2], tokens[0][2:]]

1559 delimiter = ''

1560 if len(tokens) > 1 and (tokens[1].startswith('<')

1561 and tokens[-1].endswith('>')):

1562 # "-o <value1 value2>" form; join all values into one token

1563 tokens[1:] = [' '.join(tokens[1:])]

1564 if 0 < len(tokens) <= 2:

1565 option = nodes.option(optionstring)

1566 option += nodes.option_string(tokens[0], tokens[0])

1567 if len(tokens) > 1:

1568 option += nodes.option_argument(tokens[1], tokens[1],

1569 delimiter=delimiter)

1570 optlist.append(option)

1571 else:

1572 raise MarkupError(

1573 'wrong number of option tokens (=%s), should be 1 or 2: '

1574 '"%s"' % (len(tokens), optionstring))

1575 return optlist

1576

1577 def doctest(self, match, context, next_state):

1578 line = self.document.current_line

1579 data = '\n'.join(self.state_machine.get_text_block())

1580 # TODO: Parse with `directives.body.CodeBlock` with

1581 # argument 'pycon' (Python Console) in Docutils 1.0.

1582 n = nodes.doctest_block(data, data)

1583 n.line = line

1584 self.parent += n

1585 return [], next_state, []

1586

1587 def line_block(self, match, context, next_state):

1588 """First line of a line block."""

1589 block = nodes.line_block()

1590 self.parent += block

1591 lineno = self.state_machine.abs_line_number()

1592 (block.source,

1593 block.line) = self.state_machine.get_source_and_line(lineno)

1594 line, messages, blank_finish = self.line_block_line(match, lineno)

1595 block += line

1596 self.parent += messages

1597 if not blank_finish:

1598 offset = self.state_machine.line_offset + 1 # next line

1599 new_line_offset, blank_finish = self.nested_list_parse(

1600 self.state_machine.input_lines[offset:],

1601 input_offset=self.state_machine.abs_line_offset() + 1,

1602 node=block, initial_state='LineBlock',

1603 blank_finish=0)

1604 self.goto_line(new_line_offset)

1605 if not blank_finish:

1606 self.parent += self.reporter.warning(

1607 'Line block ends without a blank line.',

1608 line=lineno+1)

1609 if len(block):

1610 if block[0].indent is None:

1611 block[0].indent = 0

1612 self.nest_line_block_lines(block)

1613 return [], next_state, []

1614

1615 def line_block_line(self, match, lineno):

1616 """Return one line element of a line_block."""

1617 (indented, indent, line_offset, blank_finish

1618 ) = self.state_machine.get_first_known_indented(match.end(),

1619 until_blank=True)

1620 text = '\n'.join(indented)

1621 text_nodes, messages = self.inline_text(text, lineno)

1622 line = nodes.line(text, '', *text_nodes)

1623 (line.source,

1624 line.line) = self.state_machine.get_source_and_line(lineno)

1625 if match.string.rstrip() != '|': # not empty

1626 line.indent = len(match.group(1)) - 1

1627 return line, messages, blank_finish

1628

1629 def nest_line_block_lines(self, block) -> None:

1630 for index in range(1, len(block)):

1631 if block[index].indent is None:

1632 block[index].indent = block[index - 1].indent

1633 self.nest_line_block_segment(block)

1634

1635 def nest_line_block_segment(self, block) -> None:

1636 indents = [item.indent for item in block]

1637 least = min(indents)

1638 new_items = []

1639 new_block = nodes.line_block()

1640 for item in block:

1641 if item.indent > least:

1642 new_block.append(item)

1643 else:

1644 if len(new_block):

1645 self.nest_line_block_segment(new_block)

1646 new_items.append(new_block)

1647 new_block = nodes.line_block()

1648 new_items.append(item)

1649 if len(new_block):

1650 self.nest_line_block_segment(new_block)

1651 new_items.append(new_block)

1652 block[:] = new_items

1653

1654 def grid_table_top(self, match, context, next_state):

1655 """Top border of a full table."""

1656 return self.table_top(match, context, next_state,

1657 self.isolate_grid_table,

1658 tableparser.GridTableParser)

1659

1660 def simple_table_top(self, match, context, next_state):

1661 """Top border of a simple table."""

1662 return self.table_top(match, context, next_state,

1663 self.isolate_simple_table,

1664 tableparser.SimpleTableParser)

1665

1666 def table_top(self, match, context, next_state,

1667 isolate_function, parser_class):

1668 """Top border of a generic table."""

1669 nodelist, blank_finish = self.table(isolate_function, parser_class)

1670 self.parent += nodelist

1671 if not blank_finish:

1672 msg = self.reporter.warning(

1673 'Blank line required after table.',

1674 line=self.state_machine.abs_line_number()+1)

1675 self.parent += msg

1676 return [], next_state, []

1677

1678 def table(self, isolate_function, parser_class):

1679 """Parse a table."""

1680 block, messages, blank_finish = isolate_function()

1681 if block:

1682 try:

1683 parser = parser_class()

1684 tabledata = parser.parse(block)

1685 tableline = (self.state_machine.abs_line_number() - len(block)

1686 + 1)

1687 table = self.build_table(tabledata, tableline)

1688 nodelist = [table] + messages

1689 except tableparser.TableMarkupError as err:

1690 nodelist = self.malformed_table(block, ' '.join(err.args),

1691 offset=err.offset) + messages

1692 else:

1693 nodelist = messages

1694 return nodelist, blank_finish

1695

1696 def isolate_grid_table(self):

1697 messages = []

1698 blank_finish = 1

1699 try:

1700 block = self.state_machine.get_text_block(flush_left=True)

1701 except statemachine.UnexpectedIndentationError as err:

1702 block, src, srcline = err.args

1703 messages.append(self.reporter.error('Unexpected indentation.',

1704 source=src, line=srcline))

1705 blank_finish = 0

1706 block.disconnect()

1707 # for East Asian chars:

1708 block.pad_double_width(self.double_width_pad_char)

1709 width = len(block[0].strip())

1710 for i in range(len(block)):

1711 block[i] = block[i].strip()

1712 if block[i][0] not in '+|': # check left edge

1713 blank_finish = 0

1714 self.state_machine.previous_line(len(block) - i)

1715 del block[i:]

1716 break

1717 if not self.grid_table_top_pat.match(block[-1]): # find bottom

1718 blank_finish = 0

1719 # from second-last to third line of table:

1720 for i in range(len(block) - 2, 1, -1):

1721 if self.grid_table_top_pat.match(block[i]):

1722 self.state_machine.previous_line(len(block) - i + 1)

1723 del block[i+1:]

1724 break

1725 else:

1726 messages.extend(self.malformed_table(block))

1727 return [], messages, blank_finish

1728 for i in range(len(block)): # check right edge

1729 if len(block[i]) != width or block[i][-1] not in '+|':

1730 messages.extend(self.malformed_table(block))

1731 return [], messages, blank_finish

1732 return block, messages, blank_finish

1733

1734 def isolate_simple_table(self):

1735 start = self.state_machine.line_offset

1736 lines = self.state_machine.input_lines

1737 limit = len(lines) - 1

1738 toplen = len(lines[start].strip())

1739 pattern_match = self.simple_table_border_pat.match

1740 found = 0

1741 found_at = None

1742 i = start + 1

1743 while i <= limit:

1744 line = lines[i]

1745 match = pattern_match(line)

1746 if match:

1747 if len(line.strip()) != toplen:

1748 self.state_machine.next_line(i - start)

1749 messages = self.malformed_table(

1750 lines[start:i+1], 'Bottom/header table border does '

1751 'not match top border.')

1752 return [], messages, i == limit or not lines[i+1].strip()

1753 found += 1

1754 found_at = i

1755 if found == 2 or i == limit or not lines[i+1].strip():

1756 end = i

1757 break

1758 i += 1

1759 else: # reached end of input_lines

1760 if found:

1761 extra = ' or no blank line after table bottom'

1762 self.state_machine.next_line(found_at - start)

1763 block = lines[start:found_at+1]

1764 else:

1765 extra = ''

1766 self.state_machine.next_line(i - start - 1)

1767 block = lines[start:]

1768 messages = self.malformed_table(

1769 block, 'No bottom table border found%s.' % extra)

1770 return [], messages, not extra

1771 self.state_machine.next_line(end - start)

1772 block = lines[start:end+1]

1773 # for East Asian chars:

1774 block.pad_double_width(self.double_width_pad_char)

1775 return block, [], end == limit or not lines[end+1].strip()

1776

1777 def malformed_table(self, block, detail='', offset=0):

1778 block.replace(self.double_width_pad_char, '')

1779 data = '\n'.join(block)

1780 message = 'Malformed table.'

1781 startline = self.state_machine.abs_line_number() - len(block) + 1

1782 if detail:

1783 message += '\n' + detail

1784 error = self.reporter.error(message, nodes.literal_block(data, data),

1785 line=startline+offset)

1786 return [error]

1787

1788 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):

1789 colwidths, headrows, bodyrows = tabledata

1790 table = nodes.table()

1791 if widths == 'auto':

1792 table['classes'] += ['colwidths-auto']

1793 elif widths: # "grid" or list of integers

1794 table['classes'] += ['colwidths-given']

1795 tgroup = nodes.tgroup(cols=len(colwidths))

1796 table += tgroup

1797 for colwidth in colwidths:

1798 colspec = nodes.colspec(colwidth=colwidth)

1799 if stub_columns:

1800 colspec.attributes['stub'] = True

1801 stub_columns -= 1

1802 tgroup += colspec

1803 if headrows:

1804 thead = nodes.thead()

1805 tgroup += thead

1806 for row in headrows:

1807 thead += self.build_table_row(row, tableline)

1808 tbody = nodes.tbody()

1809 tgroup += tbody

1810 for row in bodyrows:

1811 tbody += self.build_table_row(row, tableline)

1812 return table

1813

1814 def build_table_row(self, rowdata, tableline):

1815 row = nodes.row()

1816 for cell in rowdata:

1817 if cell is None:

1818 continue

1819 morerows, morecols, offset, cellblock = cell

1820 attributes = {}

1821 if morerows:

1822 attributes['morerows'] = morerows

1823 if morecols:

1824 attributes['morecols'] = morecols

1825 entry = nodes.entry(**attributes)

1826 row += entry

1827 if ''.join(cellblock):

1828 self.nested_parse(cellblock, input_offset=tableline+offset,

1829 node=entry)

1830 return row

1831

1832 explicit = Struct()

1833 """Patterns and constants used for explicit markup recognition."""

1834

1835 explicit.patterns = Struct(

1836 target=re.compile(r"""

1837 (

1838 _ # anonymous target

1839 | # *OR*

1840 (?!_) # no underscore at the beginning

1841 (?P<quote>`?) # optional open quote

1842 (?![ `]) # first char. not space or

1843 # backquote

1844 (?P<name> # reference name

1845 .+?

1846 )

1847 %(non_whitespace_escape_before)s

1848 (?P=quote) # close quote if open quote used

1849 )

1850 (?<!(?<!\x00):) # no unescaped colon at end

1851 %(non_whitespace_escape_before)s

1852 [ ]? # optional space

1853 : # end of reference name

1854 ([ ]+|$) # followed by whitespace

1855 """ % vars(Inliner), re.VERBOSE),

1856 reference=re.compile(r"""

1857 (

1858 (?P<simple>%(simplename)s)_

1859 | # *OR*

1860 ` # open backquote

1861 (?![ ]) # not space

1862 (?P<phrase>.+?) # hyperlink phrase

1863 %(non_whitespace_escape_before)s

1864 `_ # close backquote,

1865 # reference mark

1866 )

1867 $ # end of string

1868 """ % vars(Inliner), re.VERBOSE),

1869 substitution=re.compile(r"""

1870 (

1871 (?![ ]) # first char. not space

1872 (?P<name>.+?) # substitution text

1873 %(non_whitespace_escape_before)s

1874 \| # close delimiter

1875 )

1876 ([ ]+|$) # followed by whitespace

1877 """ % vars(Inliner),

1878 re.VERBOSE),)

1879

1880 def footnote(self, match):

1881 src, srcline = self.state_machine.get_source_and_line()

1882 (indented, indent, offset, blank_finish

1883 ) = self.state_machine.get_first_known_indented(match.end())

1884 label = match.group(1)

1885 name = normalize_name(label)

1886 footnote = nodes.footnote('\n'.join(indented))

1887 footnote.source = src

1888 footnote.line = srcline

1889 if name[0] == '#': # auto-numbered

1890 name = name[1:] # autonumber label

1891 footnote['auto'] = 1

1892 if name:

1893 footnote['names'].append(name)

1894 self.document.note_autofootnote(footnote)

1895 elif name == '*': # auto-symbol

1896 name = ''

1897 footnote['auto'] = '*'

1898 self.document.note_symbol_footnote(footnote)

1899 else: # manually numbered

1900 footnote += nodes.label('', label)

1901 footnote['names'].append(name)

1902 self.document.note_footnote(footnote)

1903 if name:

1904 self.document.note_explicit_target(footnote, footnote)

1905 else:

1906 self.document.set_id(footnote, footnote)

1907 if indented:

1908 self.nested_parse(indented, input_offset=offset, node=footnote)

1909 else:

1910 footnote += self.reporter.warning('Footnote content expected.')

1911 return [footnote], blank_finish

1912

1913 def citation(self, match):

1914 src, srcline = self.state_machine.get_source_and_line()

1915 (indented, indent, offset, blank_finish

1916 ) = self.state_machine.get_first_known_indented(match.end())

1917 label = match.group(1)

1918 name = normalize_name(label)

1919 citation = nodes.citation('\n'.join(indented))

1920 citation.source = src

1921 citation.line = srcline

1922 citation += nodes.label('', label)

1923 citation['names'].append(name)

1924 self.document.note_citation(citation)

1925 self.document.note_explicit_target(citation, citation)

1926 if indented:

1927 self.nested_parse(indented, input_offset=offset, node=citation)

1928 else:

1929 citation += self.reporter.warning('Citation content expected.')

1930 return [citation], blank_finish

1931

1932 def hyperlink_target(self, match):

1933 pattern = self.explicit.patterns.target

1934 lineno = self.state_machine.abs_line_number()

1935 (block, indent, offset, blank_finish

1936 ) = self.state_machine.get_first_known_indented(

1937 match.end(), until_blank=True, strip_indent=False)

1938 blocktext = match.string[:match.end()] + '\n'.join(block)

1939 block = [escape2null(line) for line in block]

1940 escaped = block[0]

1941 blockindex = 0

1942 while True:

1943 targetmatch = pattern.match(escaped)

1944 if targetmatch:

1945 break

1946 blockindex += 1

1947 try:

1948 escaped += block[blockindex]

1949 except IndexError:

1950 raise MarkupError('malformed hyperlink target.')

1951 del block[:blockindex]

1952 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()

1953 target = self.make_target(block, blocktext, lineno,

1954 targetmatch.group('name'))

1955 return [target], blank_finish

1956

1957 def make_target(self, block, block_text, lineno, target_name):

1958 target_type, data = self.parse_target(block, block_text, lineno)

1959 if target_type == 'refname':

1960 target = nodes.target(block_text, '', refname=normalize_name(data))

1961 target.indirect_reference_name = data

1962 self.add_target(target_name, '', target, lineno)

1963 self.document.note_indirect_target(target)

1964 return target

1965 elif target_type == 'refuri':

1966 target = nodes.target(block_text, '')

1967 self.add_target(target_name, data, target, lineno)

1968 return target

1969 else:

1970 return data

1971

1972 def parse_target(self, block, block_text, lineno):

1973 """

1974 Determine the type of reference of a target.

1975

1976 :Return: A 2-tuple, one of:

1977

1978 - 'refname' and the indirect reference name

1979 - 'refuri' and the URI

1980 - 'malformed' and a system_message node

1981 """

1982 if block and block[-1].strip()[-1:] == '_': # possible indirect target

1983 reference = ' '.join(line.strip() for line in block)

1984 refname = self.is_reference(reference)

1985 if refname:

1986 return 'refname', refname

1987 ref_parts = split_escaped_whitespace(' '.join(block))

1988 reference = ' '.join(''.join(unescape(part).split())

1989 for part in ref_parts)

1990 return 'refuri', reference

1991

1992 def is_reference(self, reference):

1993 match = self.explicit.patterns.reference.match(

1994 whitespace_normalize_name(reference))

1995 if not match:

1996 return None

1997 return unescape(match.group('simple') or match.group('phrase'))

1998

1999 def add_target(self, targetname, refuri, target, lineno):

2000 target.line = lineno

2001 if targetname:

2002 name = normalize_name(unescape(targetname))

2003 target['names'].append(name)

2004 if refuri:

2005 uri = self.inliner.adjust_uri(refuri)

2006 if uri:

2007 target['refuri'] = uri

2008 else:

2009 raise ApplicationError('problem with URI: %r' % refuri)

2010 self.document.note_explicit_target(target, self.parent)

2011 else: # anonymous target

2012 if refuri:

2013 target['refuri'] = refuri

2014 target['anonymous'] = True

2015 self.document.note_anonymous_target(target)

2016

2017 def substitution_def(self, match):

2018 pattern = self.explicit.patterns.substitution

2019 src, srcline = self.state_machine.get_source_and_line()

2020 (block, indent, offset, blank_finish

2021 ) = self.state_machine.get_first_known_indented(match.end(),

2022 strip_indent=False)

2023 blocktext = (match.string[:match.end()] + '\n'.join(block))

2024 block.disconnect()

2025 escaped = escape2null(block[0].rstrip())

2026 blockindex = 0

2027 while True:

2028 subdefmatch = pattern.match(escaped)

2029 if subdefmatch:

2030 break

2031 blockindex += 1

2032 try:

2033 escaped = escaped + ' ' + escape2null(

2034 block[blockindex].strip())

2035 except IndexError:

2036 raise MarkupError('malformed substitution definition.')

2037 del block[:blockindex] # strip out the substitution marker

2038 start = subdefmatch.end()-len(escaped)-1

2039 block[0] = (block[0].strip() + ' ')[start:-1]

2040 if not block[0]:

2041 del block[0]

2042 offset += 1

2043 while block and not block[-1].strip():

2044 block.pop()

2045 subname = subdefmatch.group('name')

2046 substitution_node = nodes.substitution_definition(blocktext)

2047 substitution_node.source = src

2048 substitution_node.line = srcline

2049 if not block:

2050 msg = self.reporter.warning(

2051 'Substitution definition "%s" missing contents.' % subname,

2052 nodes.literal_block(blocktext, blocktext),

2053 source=src, line=srcline)

2054 return [msg], blank_finish

2055 block[0] = block[0].strip()

2056 substitution_node['names'].append(

2057 nodes.whitespace_normalize_name(subname))

2058 new_abs_offset, blank_finish = self.nested_list_parse(

2059 block, input_offset=offset, node=substitution_node,

2060 initial_state='SubstitutionDef', blank_finish=blank_finish)

2061 i = 0

2062 for node in substitution_node[:]:

2063 if not (isinstance(node, nodes.Inline)

2064 or isinstance(node, nodes.Text)):

2065 self.parent += substitution_node[i]

2066 del substitution_node[i]

2067 else:

2068 i += 1

2069 for node in substitution_node.findall(nodes.Element):

2070 if self.disallowed_inside_substitution_definitions(node):

2071 pformat = nodes.literal_block('', node.pformat().rstrip())

2072 msg = self.reporter.error(

2073 'Substitution definition contains illegal element <%s>:'

2074 % node.tagname,

2075 pformat, nodes.literal_block(blocktext, blocktext),

2076 source=src, line=srcline)

2077 return [msg], blank_finish

2078 if len(substitution_node) == 0:

2079 msg = self.reporter.warning(

2080 'Substitution definition "%s" empty or invalid.' % subname,

2081 nodes.literal_block(blocktext, blocktext),

2082 source=src, line=srcline)

2083 return [msg], blank_finish

2084 self.document.note_substitution_def(

2085 substitution_node, subname, self.parent)

2086 return [substitution_node], blank_finish

2087

2088 def disallowed_inside_substitution_definitions(self, node) -> bool:

2089 if (node['ids']

2090 or isinstance(node, nodes.reference) and node.get('anonymous')

2091 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501

2092 return True

2093 else:

2094 return False

2095

2096 def directive(self, match, **option_presets):

2097 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""

2098 type_name = match.group(1)

2099 directive_class, messages = directives.directive(

2100 type_name, self.memo.language, self.document)

2101 self.parent += messages

2102 if directive_class:

2103 return self.run_directive(

2104 directive_class, match, type_name, option_presets)

2105 else:

2106 return self.unknown_directive(type_name)

2107

2108 def run_directive(self, directive, match, type_name, option_presets):

2109 """

2110 Parse a directive then run its directive function.

2111

2112 Parameters:

2113

2114 - `directive`: The class implementing the directive. Must be

2115 a subclass of `rst.Directive`.

2116

2117 - `match`: A regular expression match object which matched the first

2118 line of the directive.

2119

2120 - `type_name`: The directive name, as used in the source text.

2121

2122 - `option_presets`: A dictionary of preset options, defaults for the

2123 directive options. Currently, only an "alt" option is passed by

2124 substitution definitions (value: the substitution name), which may

2125 be used by an embedded image directive.

2126

2127 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.

2128 """

2129 if isinstance(directive, (FunctionType, MethodType)):

2130 from docutils.parsers.rst import convert_directive_function

2131 directive = convert_directive_function(directive)

2132 lineno = self.state_machine.abs_line_number()

2133 initial_line_offset = self.state_machine.line_offset

2134 (indented, indent, line_offset, blank_finish

2135 ) = self.state_machine.get_first_known_indented(match.end(),

2136 strip_top=0)

2137 block_text = '\n'.join(self.state_machine.input_lines[

2138 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501

2139 try:

2140 arguments, options, content, content_offset = (

2141 self.parse_directive_block(indented, line_offset,

2142 directive, option_presets))

2143 except MarkupError as detail:

2144 error = self.reporter.error(

2145 'Error in "%s" directive:\n%s.' % (type_name,

2146 ' '.join(detail.args)),

2147 nodes.literal_block(block_text, block_text), line=lineno)

2148 return [error], blank_finish

2149 directive_instance = directive(

2150 type_name, arguments, options, content, lineno,

2151 content_offset, block_text, self, self.state_machine)

2152 try:

2153 result = directive_instance.run()

2154 except docutils.parsers.rst.DirectiveError as error:

2155 msg_node = self.reporter.system_message(error.level, error.msg,

2156 line=lineno)

2157 msg_node += nodes.literal_block(block_text, block_text)

2158 result = [msg_node]

2159 assert isinstance(result, list), \

2160 'Directive "%s" must return a list of nodes.' % type_name

2161 for i in range(len(result)):

2162 assert isinstance(result[i], nodes.Node), \

2163 ('Directive "%s" returned non-Node object (index %s): %r'

2164 % (type_name, i, result[i]))

2165 return (result,

2166 blank_finish or self.state_machine.is_next_line_blank())

2167

2168 def parse_directive_block(self, indented, line_offset, directive,

2169 option_presets):

2170 option_spec = directive.option_spec

2171 has_content = directive.has_content

2172 if indented and not indented[0].strip():

2173 indented.trim_start()

2174 line_offset += 1

2175 while indented and not indented[-1].strip():

2176 indented.trim_end()

2177 if indented and (directive.required_arguments

2178 or directive.optional_arguments

2179 or option_spec):

2180 for i, line in enumerate(indented):

2181 if not line.strip():

2182 break

2183 else:

2184 i += 1

2185 arg_block = indented[:i]

2186 content = indented[i+1:]

2187 content_offset = line_offset + i + 1

2188 else:

2189 content = indented

2190 content_offset = line_offset

2191 arg_block = []

2192 if option_spec:

2193 options, arg_block = self.parse_directive_options(

2194 option_presets, option_spec, arg_block)

2195 else:

2196 options = {}

2197 if arg_block and not (directive.required_arguments

2198 or directive.optional_arguments):

2199 content = arg_block + indented[i:]

2200 content_offset = line_offset

2201 arg_block = []

2202 while content and not content[0].strip():

2203 content.trim_start()

2204 content_offset += 1

2205 if directive.required_arguments or directive.optional_arguments:

2206 arguments = self.parse_directive_arguments(

2207 directive, arg_block)

2208 else:

2209 arguments = []

2210 if content and not has_content:

2211 raise MarkupError('no content permitted')

2212 return arguments, options, content, content_offset

2213

2214 def parse_directive_options(self, option_presets, option_spec, arg_block):

2215 options = option_presets.copy()

2216 for i, line in enumerate(arg_block):

2217 if re.match(Body.patterns['field_marker'], line):

2218 opt_block = arg_block[i:]

2219 arg_block = arg_block[:i]

2220 break

2221 else:

2222 opt_block = []

2223 if opt_block:

2224 success, data = self.parse_extension_options(option_spec,

2225 opt_block)

2226 if success: # data is a dict of options

2227 options.update(data)

2228 else: # data is an error string

2229 raise MarkupError(data)

2230 return options, arg_block

2231

2232 def parse_directive_arguments(self, directive, arg_block):

2233 required = directive.required_arguments

2234 optional = directive.optional_arguments

2235 arg_text = '\n'.join(arg_block)

2236 arguments = arg_text.split()

2237 if len(arguments) < required:

2238 raise MarkupError('%s argument(s) required, %s supplied'

2239 % (required, len(arguments)))

2240 elif len(arguments) > required + optional:

2241 if directive.final_argument_whitespace:

2242 arguments = arg_text.split(None, required + optional - 1)

2243 else:

2244 raise MarkupError(

2245 'maximum %s argument(s) allowed, %s supplied'

2246 % (required + optional, len(arguments)))

2247 return arguments

2248

2249 def parse_extension_options(self, option_spec, datalines):

2250 """

2251 Parse `datalines` for a field list containing extension options

2252 matching `option_spec`.

2253

2254 :Parameters:

2255 - `option_spec`: a mapping of option name to conversion

2256 function, which should raise an exception on bad input.

2257 - `datalines`: a list of input strings.

2258

2259 :Return:

2260 - Success value, 1 or 0.

2261 - An option dictionary on success, an error string on failure.

2262 """

2263 node = nodes.field_list()

2264 newline_offset, blank_finish = self.nested_list_parse(

2265 datalines, 0, node, initial_state='ExtensionOptions',

2266 blank_finish=True)

2267 if newline_offset != len(datalines): # incomplete parse of block

2268 return 0, 'invalid option block'

2269 try:

2270 options = utils.extract_extension_options(node, option_spec)

2271 except KeyError as detail:

2272 return 0, 'unknown option: "%s"' % detail.args[0]

2273 except (ValueError, TypeError) as detail:

2274 return 0, 'invalid option value: %s' % ' '.join(detail.args)

2275 except utils.ExtensionOptionError as detail:

2276 return 0, 'invalid option data: %s' % ' '.join(detail.args)

2277 if blank_finish:

2278 return 1, options

2279 else:

2280 return 0, 'option data incompletely parsed'

2281

2282 def unknown_directive(self, type_name):

2283 lineno = self.state_machine.abs_line_number()

2284 (indented, indent, offset, blank_finish

2285 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)

2286 text = '\n'.join(indented)

2287 error = self.reporter.error('Unknown directive type "%s".' % type_name,

2288 nodes.literal_block(text, text),

2289 line=lineno)

2290 return [error], blank_finish

2291

2292 def comment(self, match):

2293 if self.state_machine.is_next_line_blank():

2294 first_comment_line = match.string[match.end():]

2295 if not first_comment_line.strip(): # empty comment

2296 return [nodes.comment()], True # "A tiny but practical wart."

2297 if first_comment_line.startswith('end of inclusion from "'):

2298 # cf. parsers.rst.directives.misc.Include

2299 self.document.include_log.pop()

2300 return [], True

2301 (indented, indent, offset, blank_finish

2302 ) = self.state_machine.get_first_known_indented(match.end())

2303 while indented and not indented[-1].strip():

2304 indented.trim_end()

2305 text = '\n'.join(indented)

2306 return [nodes.comment(text, text)], blank_finish

2307

2308 explicit.constructs = [

2309 (footnote,

2310 re.compile(r"""

2311 \.\.[ ]+ # explicit markup start

2312 \[

2313 ( # footnote label:

2314 [0-9]+ # manually numbered footnote

2315 | # *OR*

2316 \# # anonymous auto-numbered footnote

2317 | # *OR*

2318 \#%s # auto-number ed?) footnote label

2319 | # *OR*

2320 \* # auto-symbol footnote

2321 )

2322 \]

2323 ([ ]+|$) # whitespace or end of line

2324 """ % Inliner.simplename, re.VERBOSE)),

2325 (citation,

2326 re.compile(r"""

2327 \.\.[ ]+ # explicit markup start

2328 \[(%s)\] # citation label

2329 ([ ]+|$) # whitespace or end of line

2330 """ % Inliner.simplename, re.VERBOSE)),

2331 (hyperlink_target,

2332 re.compile(r"""

2333 \.\.[ ]+ # explicit markup start

2334 _ # target indicator

2335 (?![ ]|$) # first char. not space or EOL

2336 """, re.VERBOSE)),

2337 (substitution_def,

2338 re.compile(r"""

2339 \.\.[ ]+ # explicit markup start

2340 \| # substitution indicator

2341 (?![ ]|$) # first char. not space or EOL

2342 """, re.VERBOSE)),

2343 (directive,

2344 re.compile(r"""

2345 \.\.[ ]+ # explicit markup start

2346 (%s) # directive name

2347 [ ]? # optional space

2348 :: # directive delimiter

2349 ([ ]+|$) # whitespace or end of line

2350 """ % Inliner.simplename, re.VERBOSE))]

2351

2352 def explicit_markup(self, match, context, next_state):

2353 """Footnotes, hyperlink targets, directives, comments."""

2354 nodelist, blank_finish = self.explicit_construct(match)

2355 self.parent += nodelist

2356 self.explicit_list(blank_finish)

2357 return [], next_state, []

2358

2359 def explicit_construct(self, match):

2360 """Determine which explicit construct this is, parse & return it."""

2361 errors = []

2362 for method, pattern in self.explicit.constructs:

2363 expmatch = pattern.match(match.string)

2364 if expmatch:

2365 try:

2366 return method(self, expmatch)

2367 except MarkupError as error:

2368 lineno = self.state_machine.abs_line_number()

2369 message = ' '.join(error.args)

2370 errors.append(self.reporter.warning(message, line=lineno))

2371 break

2372 nodelist, blank_finish = self.comment(match)

2373 return nodelist + errors, blank_finish

2374

2375 def explicit_list(self, blank_finish) -> None:

2376 """

2377 Create a nested state machine for a series of explicit markup

2378 constructs (including anonymous hyperlink targets).

2379 """

2380 offset = self.state_machine.line_offset + 1 # next line

2381 newline_offset, blank_finish = self.nested_list_parse(

2382 self.state_machine.input_lines[offset:],

2383 input_offset=self.state_machine.abs_line_offset() + 1,

2384 node=self.parent, initial_state='Explicit',

2385 blank_finish=blank_finish,

2386 match_titles=self.state_machine.match_titles)

2387 self.goto_line(newline_offset)

2388 if not blank_finish:

2389 self.parent += self.unindent_warning('Explicit markup')

2390

2391 def anonymous(self, match, context, next_state):

2392 """Anonymous hyperlink targets."""

2393 nodelist, blank_finish = self.anonymous_target(match)

2394 self.parent += nodelist

2395 self.explicit_list(blank_finish)

2396 return [], next_state, []

2397

2398 def anonymous_target(self, match):

2399 lineno = self.state_machine.abs_line_number()

2400 (block, indent, offset, blank_finish

2401 ) = self.state_machine.get_first_known_indented(match.end(),

2402 until_blank=True)

2403 blocktext = match.string[:match.end()] + '\n'.join(block)

2404 block = [escape2null(line) for line in block]

2405 target = self.make_target(block, blocktext, lineno, '')

2406 return [target], blank_finish

2407

2408 def line(self, match, context, next_state):

2409 """Section title overline or transition marker."""

2410 if self.state_machine.match_titles:

2411 return [match.string], 'Line', []

2412 elif match.string.strip() == '::':

2413 raise statemachine.TransitionCorrection('text')

2414 elif len(match.string.strip()) < 4:

2415 msg = self.reporter.info(

2416 'Unexpected possible title overline or transition.\n'

2417 "Treating it as ordinary text because it's so short.",

2418 line=self.state_machine.abs_line_number())

2419 self.parent += msg

2420 raise statemachine.TransitionCorrection('text')

2421 else:

2422 blocktext = self.state_machine.line

2423 msg = self.reporter.severe(

2424 'Unexpected section title or transition.',

2425 nodes.literal_block(blocktext, blocktext),

2426 line=self.state_machine.abs_line_number())

2427 self.parent += msg

2428 return [], next_state, []

2429

2430 def text(self, match, context, next_state):

2431 """Titles, definition lists, paragraphs."""

2432 return [match.string], 'Text', []

2433

2434

2435class RFC2822Body(Body):

2436

2437 """

2438 RFC2822 headers are only valid as the first constructs in documents. As

2439 soon as anything else appears, the `Body` state should take over.

2440 """

2441

2442 patterns = Body.patterns.copy() # can't modify the original

2443 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'

2444 initial_transitions = [(name, 'Body')

2445 for name in Body.initial_transitions]

2446 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'

2447

2448 def rfc2822(self, match, context, next_state):

2449 """RFC2822-style field list item."""

2450 fieldlist = nodes.field_list(classes=['rfc2822'])

2451 self.parent += fieldlist

2452 field, blank_finish = self.rfc2822_field(match)

2453 fieldlist += field

2454 offset = self.state_machine.line_offset + 1 # next line

2455 newline_offset, blank_finish = self.nested_list_parse(

2456 self.state_machine.input_lines[offset:],

2457 input_offset=self.state_machine.abs_line_offset() + 1,

2458 node=fieldlist, initial_state='RFC2822List',

2459 blank_finish=blank_finish)

2460 self.goto_line(newline_offset)

2461 if not blank_finish:

2462 self.parent += self.unindent_warning(

2463 'RFC2822-style field list')

2464 return [], next_state, []

2465

2466 def rfc2822_field(self, match):

2467 name = match.string[:match.string.find(':')]

2468 (indented, indent, line_offset, blank_finish

2469 ) = self.state_machine.get_first_known_indented(match.end(),

2470 until_blank=True)

2471 fieldnode = nodes.field()

2472 fieldnode += nodes.field_name(name, name)

2473 fieldbody = nodes.field_body('\n'.join(indented))

2474 fieldnode += fieldbody

2475 if indented:

2476 self.nested_parse(indented, input_offset=line_offset,

2477 node=fieldbody)

2478 return fieldnode, blank_finish

2479

2480

2481class SpecializedBody(Body):

2482

2483 """

2484 Superclass for second and subsequent compound element members. Compound

2485 elements are lists and list-like constructs.

2486

2487 All transition methods are disabled (redefined as `invalid_input`).

2488 Override individual methods in subclasses to re-enable.

2489

2490 For example, once an initial bullet list item, say, is recognized, the

2491 `BulletList` subclass takes over, with a "bullet_list" node as its

2492 container. Upon encountering the initial bullet list item, `Body.bullet`

2493 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which

2494 starts up a nested parsing session with `BulletList` as the initial state.

2495 Only the ``bullet`` transition method is enabled in `BulletList`; as long

2496 as only bullet list items are encountered, they are parsed and inserted

2497 into the container. The first construct which is *not* a bullet list item

2498 triggers the `invalid_input` method, which ends the nested parse and

2499 closes the container. `BulletList` needs to recognize input that is

2500 invalid in the context of a bullet list, which means everything *other

2501 than* bullet list items, so it inherits the transition list created in

2502 `Body`.

2503 """

2504

2505 def invalid_input(self, match=None, context=None, next_state=None):

2506 """Not a compound element member. Abort this state machine."""

2507 self.state_machine.previous_line() # back up so parent SM can reassess

2508 raise EOFError

2509

2510 indent = invalid_input

2511 bullet = invalid_input

2512 enumerator = invalid_input

2513 field_marker = invalid_input

2514 option_marker = invalid_input

2515 doctest = invalid_input

2516 line_block = invalid_input

2517 grid_table_top = invalid_input

2518 simple_table_top = invalid_input

2519 explicit_markup = invalid_input

2520 anonymous = invalid_input

2521 line = invalid_input

2522 text = invalid_input

2523

2524

2525class BulletList(SpecializedBody):

2526

2527 """Second and subsequent bullet_list list_items."""

2528

2529 def bullet(self, match, context, next_state):

2530 """Bullet list item."""

2531 if match.string[0] != self.parent['bullet']:

2532 # different bullet: new list

2533 self.invalid_input()

2534 listitem, blank_finish = self.list_item(match.end())

2535 self.parent += listitem

2536 self.blank_finish = blank_finish

2537 return [], next_state, []

2538

2539

2540class DefinitionList(SpecializedBody):

2541

2542 """Second and subsequent definition_list_items."""

2543

2544 def text(self, match, context, next_state):

2545 """Definition lists."""

2546 return [match.string], 'Definition', []

2547

2548

2549class EnumeratedList(SpecializedBody):

2550

2551 """Second and subsequent enumerated_list list_items."""

2552

2553 def enumerator(self, match, context, next_state):

2554 """Enumerated list item."""

2555 format, sequence, text, ordinal = self.parse_enumerator(

2556 match, self.parent['enumtype'])

2557 if (format != self.format

2558 or (sequence != '#' and (sequence != self.parent['enumtype']

2559 or self.auto

2560 or ordinal != (self.lastordinal + 1)))

2561 or not self.is_enumerated_list_item(ordinal, sequence, format)):

2562 # different enumeration: new list

2563 self.invalid_input()

2564 if sequence == '#':

2565 self.auto = 1

2566 listitem, blank_finish = self.list_item(match.end())

2567 self.parent += listitem

2568 self.blank_finish = blank_finish

2569 self.lastordinal = ordinal

2570 return [], next_state, []

2571

2572

2573class FieldList(SpecializedBody):

2574

2575 """Second and subsequent field_list fields."""

2576

2577 def field_marker(self, match, context, next_state):

2578 """Field list field."""

2579 field, blank_finish = self.field(match)

2580 self.parent += field

2581 self.blank_finish = blank_finish

2582 return [], next_state, []

2583

2584

2585class OptionList(SpecializedBody):

2586

2587 """Second and subsequent option_list option_list_items."""

2588

2589 def option_marker(self, match, context, next_state):

2590 """Option list item."""

2591 try:

2592 option_list_item, blank_finish = self.option_list_item(match)

2593 except MarkupError:

2594 self.invalid_input()

2595 self.parent += option_list_item

2596 self.blank_finish = blank_finish

2597 return [], next_state, []

2598

2599

2600class RFC2822List(SpecializedBody, RFC2822Body):

2601

2602 """Second and subsequent RFC2822-style field_list fields."""

2603

2604 patterns = RFC2822Body.patterns

2605 initial_transitions = RFC2822Body.initial_transitions

2606

2607 def rfc2822(self, match, context, next_state):

2608 """RFC2822-style field list item."""

2609 field, blank_finish = self.rfc2822_field(match)

2610 self.parent += field

2611 self.blank_finish = blank_finish

2612 return [], 'RFC2822List', []

2613

2614 blank = SpecializedBody.invalid_input

2615

2616

2617class ExtensionOptions(FieldList):

2618

2619 """

2620 Parse field_list fields for extension options.

2621

2622 No nested parsing is done (including inline markup parsing).

2623 """

2624

2625 def parse_field_body(self, indented, offset, node) -> None:

2626 """Override `Body.parse_field_body` for simpler parsing."""

2627 lines = []

2628 for line in list(indented) + ['']:

2629 if line.strip():

2630 lines.append(line)

2631 elif lines:

2632 text = '\n'.join(lines)

2633 node += nodes.paragraph(text, text)

2634 lines = []

2635

2636

2637class LineBlock(SpecializedBody):

2638

2639 """Second and subsequent lines of a line_block."""

2640

2641 blank = SpecializedBody.invalid_input

2642

2643 def line_block(self, match, context, next_state):

2644 """New line of line block."""

2645 lineno = self.state_machine.abs_line_number()

2646 line, messages, blank_finish = self.line_block_line(match, lineno)

2647 self.parent += line

2648 self.parent.parent += messages

2649 self.blank_finish = blank_finish

2650 return [], next_state, []

2651

2652

2653class Explicit(SpecializedBody):

2654

2655 """Second and subsequent explicit markup construct."""

2656

2657 def explicit_markup(self, match, context, next_state):

2658 """Footnotes, hyperlink targets, directives, comments."""

2659 nodelist, blank_finish = self.explicit_construct(match)

2660 self.parent += nodelist

2661 self.blank_finish = blank_finish

2662 return [], next_state, []

2663

2664 def anonymous(self, match, context, next_state):

2665 """Anonymous hyperlink targets."""

2666 nodelist, blank_finish = self.anonymous_target(match)

2667 self.parent += nodelist

2668 self.blank_finish = blank_finish

2669 return [], next_state, []

2670

2671 blank = SpecializedBody.invalid_input

2672

2673

2674class SubstitutionDef(Body):

2675

2676 """

2677 Parser for the contents of a substitution_definition element.

2678 """

2679

2680 patterns = {

2681 'embedded_directive': re.compile(r'(%s)::( +|$)'

2682 % Inliner.simplename),

2683 'text': r''}

2684 initial_transitions = ['embedded_directive', 'text']

2685

2686 def embedded_directive(self, match, context, next_state):

2687 nodelist, blank_finish = self.directive(match,

2688 alt=self.parent['names'][0])

2689 self.parent += nodelist

2690 if not self.state_machine.at_eof():

2691 self.blank_finish = blank_finish

2692 raise EOFError

2693

2694 def text(self, match, context, next_state):

2695 if not self.state_machine.at_eof():

2696 self.blank_finish = self.state_machine.is_next_line_blank()

2697 raise EOFError

2698

2699

2700class Text(RSTState):

2701

2702 """

2703 Classifier of second line of a text block.

2704

2705 Could be a paragraph, a definition list item, or a title.

2706 """

2707

2708 patterns = {'underline': Body.patterns['line'],

2709 'text': r''}

2710 initial_transitions = [('underline', 'Body'), ('text', 'Body')]

2711

2712 def blank(self, match, context, next_state):

2713 """End of paragraph."""

2714 # NOTE: self.paragraph returns [node, system_message(s)], literalnext

2715 paragraph, literalnext = self.paragraph(

2716 context, self.state_machine.abs_line_number() - 1)

2717 self.parent += paragraph

2718 if literalnext:

2719 self.parent += self.literal_block()

2720 return [], 'Body', []

2721

2722 def eof(self, context):

2723 if context:

2724 self.blank(None, context, None)

2725 return []

2726

2727 def indent(self, match, context, next_state):

2728 """Definition list item."""

2729 dl = nodes.definition_list()

2730 # the definition list starts on the line before the indent:

2731 lineno = self.state_machine.abs_line_number() - 1

2732 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)

2733 dl_item, blank_finish = self.definition_list_item(context)

2734 dl += dl_item

2735 self.parent += dl

2736 offset = self.state_machine.line_offset + 1 # next line

2737 newline_offset, blank_finish = self.nested_list_parse(

2738 self.state_machine.input_lines[offset:],

2739 input_offset=self.state_machine.abs_line_offset() + 1,

2740 node=dl, initial_state='DefinitionList',

2741 blank_finish=blank_finish, blank_finish_state='Definition')

2742 self.goto_line(newline_offset)

2743 if not blank_finish:

2744 self.parent += self.unindent_warning('Definition list')

2745 return [], 'Body', []

2746

2747 def underline(self, match, context, next_state):

2748 """Section title."""

2749 lineno = self.state_machine.abs_line_number()

2750 title = context[0].rstrip()

2751 underline = match.string.rstrip()

2752 source = title + '\n' + underline

2753 messages = []

2754 if column_width(title) > len(underline):

2755 if len(underline) < 4:

2756 if self.state_machine.match_titles:

2757 msg = self.reporter.info(

2758 'Possible title underline, too short for the title.\n'

2759 "Treating it as ordinary text because it's so short.",

2760 line=lineno)

2761 self.parent += msg

2762 raise statemachine.TransitionCorrection('text')

2763 else:

2764 blocktext = context[0] + '\n' + self.state_machine.line

2765 msg = self.reporter.warning(

2766 'Title underline too short.',

2767 nodes.literal_block(blocktext, blocktext),

2768 line=lineno)

2769 messages.append(msg)

2770 if not self.state_machine.match_titles:

2771 blocktext = context[0] + '\n' + self.state_machine.line

2772 # We need get_source_and_line() here to report correctly

2773 src, srcline = self.state_machine.get_source_and_line()

2774 # TODO: why is abs_line_number() == srcline+1

2775 # if the error is in a table (try with test_tables.py)?

2776 # print("get_source_and_line", srcline)

2777 # print("abs_line_number", self.state_machine.abs_line_number())

2778 msg = self.reporter.severe(

2779 'Unexpected section title.',

2780 nodes.literal_block(blocktext, blocktext),

2781 source=src, line=srcline)

2782 self.parent += messages

2783 self.parent += msg

2784 return [], next_state, []

2785 style = underline[0]

2786 context[:] = []

2787 self.section(title, source, style, lineno - 1, messages)

2788 return [], next_state, []

2789

2790 def text(self, match, context, next_state):

2791 """Paragraph."""

2792 startline = self.state_machine.abs_line_number() - 1

2793 msg = None

2794 try:

2795 block = self.state_machine.get_text_block(flush_left=True)

2796 except statemachine.UnexpectedIndentationError as err:

2797 block, src, srcline = err.args

2798 msg = self.reporter.error('Unexpected indentation.',

2799 source=src, line=srcline)

2800 lines = context + list(block)

2801 paragraph, literalnext = self.paragraph(lines, startline)

2802 self.parent += paragraph

2803 self.parent += msg

2804 if literalnext:

2805 try:

2806 self.state_machine.next_line()

2807 except EOFError:

2808 pass

2809 self.parent += self.literal_block()

2810 return [], next_state, []

2811

2812 def literal_block(self):

2813 """Return a list of nodes."""

2814 (indented, indent, offset, blank_finish

2815 ) = self.state_machine.get_indented()

2816 while indented and not indented[-1].strip():

2817 indented.trim_end()

2818 if not indented:

2819 return self.quoted_literal_block()

2820 data = '\n'.join(indented)

2821 literal_block = nodes.literal_block(data, data)

2822 (literal_block.source,

2823 literal_block.line) = self.state_machine.get_source_and_line(offset+1)

2824 nodelist = [literal_block]

2825 if not blank_finish:

2826 nodelist.append(self.unindent_warning('Literal block'))

2827 return nodelist

2828

2829 def quoted_literal_block(self):

2830 abs_line_offset = self.state_machine.abs_line_offset()

2831 offset = self.state_machine.line_offset

2832 parent_node = nodes.Element()

2833 new_abs_offset = self.nested_parse(

2834 self.state_machine.input_lines[offset:],

2835 input_offset=abs_line_offset, node=parent_node, match_titles=False,

2836 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),

2837 'initial_state': 'QuotedLiteralBlock'})

2838 self.goto_line(new_abs_offset)

2839 return parent_node.children

2840

2841 def definition_list_item(self, termline):

2842 # the parser is already on the second (indented) line:

2843 dd_lineno = self.state_machine.abs_line_number()

2844 dt_lineno = dd_lineno - 1

2845 (indented, indent, line_offset, blank_finish

2846 ) = self.state_machine.get_indented()

2847 dl_item = nodes.definition_list_item(

2848 '\n'.join(termline + list(indented)))

2849 (dl_item.source,

2850 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)

2851 dt_nodes, messages = self.term(termline, dt_lineno)

2852 dl_item += dt_nodes

2853 dd = nodes.definition('', *messages)

2854 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)

2855 dl_item += dd

2856 if termline[0][-2:] == '::':

2857 dd += self.reporter.info(

2858 'Blank line missing before literal block (after the "::")? '

2859 'Interpreted as a definition list item.',

2860 line=dd_lineno)

2861 # TODO: drop a definition if it is an empty comment to allow

2862 # definition list items with several terms?

2863 # https://sourceforge.net/p/docutils/feature-requests/60/

2864 self.nested_parse(indented, input_offset=line_offset, node=dd)

2865 return dl_item, blank_finish

2866

2867 classifier_delimiter = re.compile(' +: +')

2868

2869 def term(self, lines, lineno):

2870 """Return a definition_list's term and optional classifiers."""

2871 assert len(lines) == 1

2872 text_nodes, messages = self.inline_text(lines[0], lineno)

2873 dt = nodes.term(lines[0])

2874 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)

2875 node_list = [dt]

2876 for i in range(len(text_nodes)):

2877 node = text_nodes[i]

2878 if isinstance(node, nodes.Text):

2879 parts = self.classifier_delimiter.split(node)

2880 if len(parts) == 1:

2881 node_list[-1] += node

2882 else:

2883 text = parts[0].rstrip()

2884 textnode = nodes.Text(text)

2885 node_list[-1] += textnode

2886 node_list += [nodes.classifier(unescape(part, True), part)

2887 for part in parts[1:]]

2888 else:

2889 node_list[-1] += node

2890 return node_list, messages

2891

2892

2893class SpecializedText(Text):

2894

2895 """

2896 Superclass for second and subsequent lines of Text-variants.

2897

2898 All transition methods are disabled. Override individual methods in

2899 subclasses to re-enable.

2900 """

2901

2902 def eof(self, context):

2903 """Incomplete construct."""

2904 return []

2905

2906 def invalid_input(self, match=None, context=None, next_state=None):

2907 """Not a compound element member. Abort this state machine."""

2908 raise EOFError

2909

2910 blank = invalid_input

2911 indent = invalid_input

2912 underline = invalid_input

2913 text = invalid_input

2914

2915

2916class Definition(SpecializedText):

2917

2918 """Second line of potential definition_list_item."""

2919

2920 def eof(self, context):

2921 """Not a definition."""

2922 self.state_machine.previous_line(2) # so parent SM can reassess

2923 return []

2924

2925 def indent(self, match, context, next_state):

2926 """Definition list item."""

2927 dl_item, blank_finish = self.definition_list_item(context)

2928 self.parent += dl_item

2929 self.blank_finish = blank_finish

2930 return [], 'DefinitionList', []

2931

2932

2933class Line(SpecializedText):

2934

2935 """

2936 Second line of over- & underlined section title or transition marker.

2937 """

2938

2939 eofcheck = 1 # ignored, will be removed in Docutils 2.0.

2940

2941 def eof(self, context):

2942 """Transition marker at end of section or document."""

2943 marker = context[0].strip()

2944 if len(marker) < 4:

2945 self.state_correction(context)

2946 src, srcline = self.state_machine.get_source_and_line()

2947 # lineno = self.state_machine.abs_line_number() - 1

2948 transition = nodes.transition(rawsource=context[0])

2949 transition.source = src

2950 transition.line = srcline - 1

2951 # transition.line = lineno

2952 self.parent += transition

2953 return []

2954

2955 def blank(self, match, context, next_state):

2956 """Transition marker."""

2957 src, srcline = self.state_machine.get_source_and_line()

2958 marker = context[0].strip()

2959 if len(marker) < 4:

2960 self.state_correction(context)

2961 transition = nodes.transition(rawsource=marker)

2962 transition.source = src

2963 transition.line = srcline - 1

2964 self.parent += transition

2965 return [], 'Body', []

2966

2967 def text(self, match, context, next_state):

2968 """Potential over- & underlined title."""

2969 lineno = self.state_machine.abs_line_number() - 1

2970 overline = context[0]

2971 title = match.string

2972 underline = ''

2973 try:

2974 underline = self.state_machine.next_line()

2975 except EOFError:

2976 blocktext = overline + '\n' + title

2977 if len(overline.rstrip()) < 4:

2978 self.short_overline(context, blocktext, lineno, 2)

2979 else:

2980 msg = self.reporter.severe(

2981 'Incomplete section title.',

2982 nodes.literal_block(blocktext, blocktext),

2983 line=lineno)

2984 self.parent += msg

2985 return [], 'Body', []

2986 source = '%s\n%s\n%s' % (overline, title, underline)

2987 overline = overline.rstrip()

2988 underline = underline.rstrip()

2989 if not self.transitions['underline'][0].match(underline):

2990 blocktext = overline + '\n' + title + '\n' + underline

2991 if len(overline.rstrip()) < 4:

2992 self.short_overline(context, blocktext, lineno, 2)

2993 else:

2994 msg = self.reporter.severe(

2995 'Missing matching underline for section title overline.',

2996 nodes.literal_block(source, source),

2997 line=lineno)

2998 self.parent += msg

2999 return [], 'Body', []

3000 elif overline != underline:

3001 blocktext = overline + '\n' + title + '\n' + underline

3002 if len(overline.rstrip()) < 4:

3003 self.short_overline(context, blocktext, lineno, 2)

3004 else:

3005 msg = self.reporter.severe(

3006 'Title overline & underline mismatch.',

3007 nodes.literal_block(source, source),

3008 line=lineno)

3009 self.parent += msg

3010 return [], 'Body', []

3011 title = title.rstrip()

3012 messages = []

3013 if column_width(title) > len(overline):

3014 blocktext = overline + '\n' + title + '\n' + underline

3015 if len(overline.rstrip()) < 4:

3016 self.short_overline(context, blocktext, lineno, 2)

3017 else:

3018 msg = self.reporter.warning(

3019 'Title overline too short.',

3020 nodes.literal_block(source, source),

3021 line=lineno)

3022 messages.append(msg)

3023 style = (overline[0], underline[0])

3024 self.section(title.lstrip(), source, style, lineno + 1, messages)

3025 return [], 'Body', []

3026

3027 indent = text # indented title

3028

3029 def underline(self, match, context, next_state):

3030 overline = context[0]

3031 blocktext = overline + '\n' + self.state_machine.line

3032 lineno = self.state_machine.abs_line_number() - 1

3033 if len(overline.rstrip()) < 4:

3034 self.short_overline(context, blocktext, lineno, 1)

3035 msg = self.reporter.error(

3036 'Invalid section title or transition marker.',

3037 nodes.literal_block(blocktext, blocktext),

3038 line=lineno)

3039 self.parent += msg

3040 return [], 'Body', []

3041

3042 def short_overline(self, context, blocktext, lineno, lines=1) -> None:

3043 msg = self.reporter.info(

3044 'Possible incomplete section title.\nTreating the overline as '

3045 "ordinary text because it's so short.",

3046 line=lineno)

3047 self.parent += msg

3048 self.state_correction(context, lines)

3049

3050 def state_correction(self, context, lines=1):

3051 self.state_machine.previous_line(lines)

3052 context[:] = []

3053 raise statemachine.StateCorrection('Body', 'text')

3054

3055

3056class QuotedLiteralBlock(RSTState):

3057

3058 """

3059 Nested parse handler for quoted (unindented) literal blocks.

3060

3061 Special-purpose. Not for inclusion in `state_classes`.

3062 """

3063

3064 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,

3065 'text': r''}

3066 initial_transitions = ('initial_quoted', 'text')

3067

3068 def __init__(self, state_machine, debug=False) -> None:

3069 RSTState.__init__(self, state_machine, debug)

3070 self.messages = []

3071 self.initial_lineno = None

3072

3073 def blank(self, match, context, next_state):

3074 if context:

3075 raise EOFError

3076 else:

3077 return context, next_state, []

3078

3079 def eof(self, context):

3080 if context:

3081 src, srcline = self.state_machine.get_source_and_line(

3082 self.initial_lineno)

3083 text = '\n'.join(context)

3084 literal_block = nodes.literal_block(text, text)

3085 literal_block.source = src

3086 literal_block.line = srcline

3087 self.parent += literal_block

3088 else:

3089 self.parent += self.reporter.warning(

3090 'Literal block expected; none found.',

3091 line=self.state_machine.abs_line_number()

3092 ) # src not available, statemachine.input_lines is empty

3093 self.state_machine.previous_line()

3094 self.parent += self.messages

3095 return []

3096

3097 def indent(self, match, context, next_state):

3098 assert context, ('QuotedLiteralBlock.indent: context should not '

3099 'be empty!')

3100 self.messages.append(

3101 self.reporter.error('Unexpected indentation.',

3102 line=self.state_machine.abs_line_number()))

3103 self.state_machine.previous_line()

3104 raise EOFError

3105

3106 def initial_quoted(self, match, context, next_state):

3107 """Match arbitrary quote character on the first line only."""

3108 self.remove_transition('initial_quoted')

3109 quote = match.string[0]

3110 pattern = re.compile(re.escape(quote))

3111 # New transition matches consistent quotes only:

3112 self.add_transition('quoted',

3113 (pattern, self.quoted, self.__class__.__name__))

3114 self.initial_lineno = self.state_machine.abs_line_number()

3115 return [match.string], next_state, []

3116

3117 def quoted(self, match, context, next_state):

3118 """Match consistent quotes on subsequent lines."""

3119 context.append(match.string)

3120 return context, next_state, []

3121

3122 def text(self, match, context, next_state):

3123 if context:

3124 self.messages.append(

3125 self.reporter.error('Inconsistent literal block quoting.',

3126 line=self.state_machine.abs_line_number()))

3127 self.state_machine.previous_line()

3128 raise EOFError

3129

3130

3131state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,

3132 OptionList, LineBlock, ExtensionOptions, Explicit, Text,

3133 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)

3134"""Standard set of State classes used to start `RSTStateMachine`."""