Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/docutils/parsers/rst/states.py: 66%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Copyright: This module has been placed in the public domain.

5"""

6This is the ``docutils.parsers.rst.states`` module, the core of

7the reStructuredText parser. It defines the following:

9:Classes:

10 - `RSTStateMachine`: reStructuredText parser's entry point.

11 - `NestedStateMachine`: recursive StateMachine.

12 - `RSTState`: reStructuredText State superclass.

13 - `Inliner`: For parsing inline markup.

14 - `Body`: Generic classifier of the first line of a block.

15 - `SpecializedBody`: Superclass for compound element members.

16 - `BulletList`: Second and subsequent bullet_list list_items

17 - `DefinitionList`: Second+ definition_list_items.

18 - `EnumeratedList`: Second+ enumerated_list list_items.

19 - `FieldList`: Second+ fields.

20 - `OptionList`: Second+ option_list_items.

21 - `RFC2822List`: Second+ RFC2822-style fields.

22 - `ExtensionOptions`: Parses directive option fields.

23 - `Explicit`: Second+ explicit markup constructs.

24 - `SubstitutionDef`: For embedded directives in substitution definitions.

25 - `Text`: Classifier of second line of a text block.

26 - `SpecializedText`: Superclass for continuation lines of Text-variants.

27 - `Definition`: Second line of potential definition_list_item.

28 - `Line`: Second line of overlined section title or transition marker.

29 - `Struct`: An auxiliary collection class.

31:Exception classes:

32 - `MarkupError`

33 - `ParserError`

34 - `MarkupMismatch`

36:Functions:

37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.

38 - `unescape()`: Return a string, nulls removed or restored to backslashes.

40:Attributes:

41 - `state_classes`: set of State classes used with `RSTStateMachine`.

43Parser Overview

44===============

46The reStructuredText parser is implemented as a recursive state machine,

47examining its input one line at a time. To understand how the parser works,

48please first become familiar with the `docutils.statemachine` module. In the

49description below, references are made to classes defined in this module;

50please see the individual classes for details.

52Parsing proceeds as follows:

541. The state machine examines each line of input, checking each of the

55 transition patterns of the state `Body`, in order, looking for a match.

56 The implicit transitions (blank lines and indentation) are checked before

57 any others. The 'text' transition is a catch-all (matches anything).

592. The method associated with the matched transition pattern is called.

61 A. Some transition methods are self-contained, appending elements to the

62 document tree (`Body.doctest` parses a doctest block). The parser's

63 current line index is advanced to the end of the element, and parsing

64 continues with step 1.

66 B. Other transition methods trigger the creation of a nested state machine,

67 whose job is to parse a compound construct ('indent' does a block quote,

68 'bullet' does a bullet list, 'overline' does a section [first checking

69 for a valid section header], etc.).

71 - In the case of lists and explicit markup, a one-off state machine is

72 created and run to parse contents of the first item.

74 - A new state machine is created and its initial state is set to the

75 appropriate specialized state (`BulletList` in the case of the

76 'bullet' transition; see `SpecializedBody` for more detail). This

77 state machine is run to parse the compound element (or series of

78 explicit markup elements), and returns as soon as a non-member element

79 is encountered. For example, the `BulletList` state machine ends as

80 soon as it encounters an element which is not a list item of that

81 bullet list. The optional omission of inter-element blank lines is

82 enabled by this nested state machine.

84 - The current line index is advanced to the end of the elements parsed,

85 and parsing continues with step 1.

87 C. The result of the 'text' transition depends on the next line of text.

88 The current state is changed to `Text`, under which the second line is

89 examined. If the second line is:

91 - Indented: The element is a definition list item, and parsing proceeds

92 similarly to step 2.B, using the `DefinitionList` state.

94 - A line of uniform punctuation characters: The element is a section

95 header; again, parsing proceeds as in step 2.B, and `Body` is still

96 used.

98 - Anything else: The element is a paragraph, which is examined for

99 inline markup and appended to the parent element. Processing

100 continues with step 1.

101"""

102

103__docformat__ = 'reStructuredText'

104

105

106import re

107from types import FunctionType, MethodType

108

109from docutils import nodes, statemachine, utils

110from docutils import ApplicationError, DataError

111from docutils.statemachine import StateMachineWS, StateWS

112from docutils.nodes import fully_normalize_name as normalize_name

113from docutils.nodes import unescape, whitespace_normalize_name

114import docutils.parsers.rst

115from docutils.parsers.rst import directives, languages, tableparser, roles

116from docutils.utils import escape2null, column_width

117from docutils.utils import punctuation_chars, urischemes

118from docutils.utils import split_escaped_whitespace

119from docutils.utils._roman_numerals import (

120 InvalidRomanNumeralError,

121 RomanNumeral,

122)

123

124

125class MarkupError(DataError): pass

126class UnknownInterpretedRoleError(DataError): pass

127class InterpretedRoleNotImplementedError(DataError): pass

128class ParserError(ApplicationError): pass

129class MarkupMismatch(Exception): pass

130

131

132class Struct:

133

134 """Stores data attributes for dotted-attribute access."""

135

136 def __init__(self, **keywordargs) -> None:

137 self.__dict__.update(keywordargs)

138

139

140class RSTStateMachine(StateMachineWS):

141

142 """

143 reStructuredText's master StateMachine.

144

145 The entry point to reStructuredText parsing is the `run()` method.

146 """

147

148 def run(self, input_lines, document, input_offset=0, match_titles=True,

149 inliner=None) -> None:

150 """

151 Parse `input_lines` and modify the `document` node in place.

152

153 Extend `StateMachineWS.run()`: set up parse-global data and

154 run the StateMachine.

155 """

156 self.language = languages.get_language(

157 document.settings.language_code, document.reporter)

158 self.match_titles = match_titles

159 if inliner is None:

160 inliner = Inliner()

161 inliner.init_customizations(document.settings)

162 self.memo = Struct(document=document,

163 reporter=document.reporter,

164 language=self.language,

165 title_styles=[],

166 section_level=0,

167 section_bubble_up_kludge=False,

168 inliner=inliner)

169 self.document = document

170 self.attach_observer(document.note_source)

171 self.reporter = self.memo.reporter

172 self.node = document

173 results = StateMachineWS.run(self, input_lines, input_offset,

174 input_source=document['source'])

175 assert results == [], 'RSTStateMachine.run() results should be empty!'

176 self.node = self.memo = None # remove unneeded references

177

178

179class NestedStateMachine(StateMachineWS):

180

181 """

182 StateMachine run from within other StateMachine runs, to parse nested

183 document structures.

184 """

185

186 def run(self, input_lines, input_offset, memo, node, match_titles=True):

187 """

188 Parse `input_lines` and populate a `docutils.nodes.document` instance.

189

190 Extend `StateMachineWS.run()`: set up document-wide data.

191 """

192 self.match_titles = match_titles

193 self.memo = memo

194 self.document = memo.document

195 self.attach_observer(self.document.note_source)

196 self.reporter = memo.reporter

197 self.language = memo.language

198 self.node = node

199 results = StateMachineWS.run(self, input_lines, input_offset)

200 assert results == [], ('NestedStateMachine.run() results should be '

201 'empty!')

202 return results

203

204

205class RSTState(StateWS):

206

207 """

208 reStructuredText State superclass.

209

210 Contains methods used by all State subclasses.

211 """

212

213 nested_sm = NestedStateMachine

214 nested_sm_cache = []

215

216 def __init__(self, state_machine, debug=False) -> None:

217 self.nested_sm_kwargs = {'state_classes': state_classes,

218 'initial_state': 'Body'}

219 StateWS.__init__(self, state_machine, debug)

220

221 def runtime_init(self) -> None:

222 StateWS.runtime_init(self)

223 memo = self.state_machine.memo

224 self.memo = memo

225 self.reporter = memo.reporter

226 self.inliner = memo.inliner

227 self.document = memo.document

228 self.parent = self.state_machine.node

229 # enable the reporter to determine source and source-line

230 if not hasattr(self.reporter, 'get_source_and_line'):

231 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501

232

233 def goto_line(self, abs_line_offset) -> None:

234 """

235 Jump to input line `abs_line_offset`, ignoring jumps past the end.

236 """

237 try:

238 self.state_machine.goto_line(abs_line_offset)

239 except EOFError:

240 pass

241

242 def no_match(self, context, transitions):

243 """

244 Override `StateWS.no_match` to generate a system message.

245

246 This code should never be run.

247 """

248 self.reporter.severe(

249 'Internal error: no transition pattern match. State: "%s"; '

250 'transitions: %s; context: %s; current line: %r.'

251 % (self.__class__.__name__, transitions, context,

252 self.state_machine.line))

253 return context, None, []

254

255 def bof(self, context):

256 """Called at beginning of file."""

257 return [], []

258

259 def nested_parse(self, block, input_offset, node, match_titles=False,

260 state_machine_class=None, state_machine_kwargs=None):

261 """

262 Create a new StateMachine rooted at `node` and run it over the input

263 `block`.

264 """

265 use_default = 0

266 if state_machine_class is None:

267 state_machine_class = self.nested_sm

268 use_default += 1

269 if state_machine_kwargs is None:

270 state_machine_kwargs = self.nested_sm_kwargs

271 use_default += 1

272 block_length = len(block)

273

274 state_machine = None

275 if use_default == 2:

276 try:

277 state_machine = self.nested_sm_cache.pop()

278 except IndexError:

279 pass

280 if not state_machine:

281 state_machine = state_machine_class(debug=self.debug,

282 **state_machine_kwargs)

283 state_machine.run(block, input_offset, memo=self.memo,

284 node=node, match_titles=match_titles)

285 if use_default == 2:

286 self.nested_sm_cache.append(state_machine)

287 else:

288 state_machine.unlink()

289 new_offset = state_machine.abs_line_offset()

290 # No `block.parent` implies disconnected -- lines aren't in sync:

291 if block.parent and (len(block) - block_length) != 0:

292 # Adjustment for block if modified in nested parse:

293 self.state_machine.next_line(len(block) - block_length)

294 return new_offset

295

296 def nested_list_parse(self, block, input_offset, node, initial_state,

297 blank_finish,

298 blank_finish_state=None,

299 extra_settings={},

300 match_titles=False,

301 state_machine_class=None,

302 state_machine_kwargs=None):

303 """

304 Create a new StateMachine rooted at `node` and run it over the input

305 `block`. Also keep track of optional intermediate blank lines and the

306 required final one.

307 """

308 if state_machine_class is None:

309 state_machine_class = self.nested_sm

310 if state_machine_kwargs is None:

311 state_machine_kwargs = self.nested_sm_kwargs.copy()

312 state_machine_kwargs['initial_state'] = initial_state

313 state_machine = state_machine_class(debug=self.debug,

314 **state_machine_kwargs)

315 if blank_finish_state is None:

316 blank_finish_state = initial_state

317 state_machine.states[blank_finish_state].blank_finish = blank_finish

318 for key, value in extra_settings.items():

319 setattr(state_machine.states[initial_state], key, value)

320 state_machine.run(block, input_offset, memo=self.memo,

321 node=node, match_titles=match_titles)

322 blank_finish = state_machine.states[blank_finish_state].blank_finish

323 state_machine.unlink()

324 return state_machine.abs_line_offset(), blank_finish

325

326 def section(self, title, source, style, lineno, messages) -> None:

327 """Check for a valid subsection and create one if it checks out."""

328 if self.check_subsection(source, style, lineno):

329 self.new_subsection(title, lineno, messages)

330

331 def check_subsection(self, source, style, lineno) -> bool:

332 """

333 Check for a valid subsection header. Return True or False.

334

335 When a new section is reached that isn't a subsection of the current

336 section, back up the line count (use ``previous_line(-x)``), then

337 ``raise EOFError``. The current StateMachine will finish, then the

338 calling StateMachine can re-examine the title. This will work its way

339 back up the calling chain until the correct section level isreached.

340

341 @@@ Alternative: Evaluate the title, store the title info & level, and

342 back up the chain until that level is reached. Store in memo? Or

343 return in results?

344

345 :Exception: `EOFError` when a sibling or supersection encountered.

346 """

347 memo = self.memo

348 title_styles = memo.title_styles

349 mylevel = memo.section_level

350 try: # check for existing title style

351 level = title_styles.index(style) + 1

352 except ValueError: # new title style

353 if len(title_styles) == memo.section_level: # new subsection

354 title_styles.append(style)

355 return True

356 else: # not at lowest level

357 self.parent += self.title_inconsistent(source, lineno)

358 return False

359 if level <= mylevel: # sibling or supersection

360 memo.section_level = level # bubble up to parent section

361 if len(style) == 2:

362 memo.section_bubble_up_kludge = True

363 # back up 2 lines for underline title, 3 for overline title

364 self.state_machine.previous_line(len(style) + 1)

365 raise EOFError # let parent section re-evaluate

366 if level == mylevel + 1: # immediate subsection

367 return True

368 else: # invalid subsection

369 self.parent += self.title_inconsistent(source, lineno)

370 return False

371

372 def title_inconsistent(self, sourcetext, lineno):

373 error = self.reporter.severe(

374 'Title level inconsistent:', nodes.literal_block('', sourcetext),

375 line=lineno)

376 return error

377

378 def new_subsection(self, title, lineno, messages):

379 """Append new subsection to document tree. On return, check level."""

380 memo = self.memo

381 mylevel = memo.section_level

382 memo.section_level += 1

383 section_node = nodes.section()

384 self.parent += section_node

385 textnodes, title_messages = self.inline_text(title, lineno)

386 titlenode = nodes.title(title, '', *textnodes)

387 name = normalize_name(titlenode.astext())

388 section_node['names'].append(name)

389 section_node += titlenode

390 section_node += messages

391 section_node += title_messages

392 self.document.note_implicit_target(section_node, section_node)

393 offset = self.state_machine.line_offset + 1

394 absoffset = self.state_machine.abs_line_offset() + 1

395 newabsoffset = self.nested_parse(

396 self.state_machine.input_lines[offset:], input_offset=absoffset,

397 node=section_node, match_titles=True)

398 self.goto_line(newabsoffset)

399 if memo.section_level <= mylevel: # can't handle next section?

400 raise EOFError # bubble up to supersection

401 # reset section_level; next pass will detect it properly

402 memo.section_level = mylevel

403

404 def paragraph(self, lines, lineno):

405 """

406 Return a list (paragraph & messages) & a boolean: literal_block next?

407 """

408 data = '\n'.join(lines).rstrip()

409 if re.search(r'(?<!\\)(\\\\)*::$', data):

410 if len(data) == 2:

411 return [], 1

412 elif data[-3] in ' \n':

413 text = data[:-3].rstrip()

414 else:

415 text = data[:-1]

416 literalnext = 1

417 else:

418 text = data

419 literalnext = 0

420 textnodes, messages = self.inline_text(text, lineno)

421 p = nodes.paragraph(data, '', *textnodes)

422 p.source, p.line = self.state_machine.get_source_and_line(lineno)

423 return [p] + messages, literalnext

424

425 def inline_text(self, text, lineno):

426 """

427 Return 2 lists: nodes (text and inline elements), and system_messages.

428 """

429 nodes, messages = self.inliner.parse(text, lineno,

430 self.memo, self.parent)

431 return nodes, messages

432

433 def unindent_warning(self, node_name):

434 # the actual problem is one line below the current line

435 lineno = self.state_machine.abs_line_number() + 1

436 return self.reporter.warning('%s ends without a blank line; '

437 'unexpected unindent.' % node_name,

438 line=lineno)

439

440

441def build_regexp(definition, compile_patterns=True):

442 """

443 Build, compile and return a regular expression based on `definition`.

444

445 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),

446 where "parts" is a list of regular expressions and/or regular

447 expression definitions to be joined into an or-group.

448 """

449 name, prefix, suffix, parts = definition

450 part_strings = []

451 for part in parts:

452 if isinstance(part, tuple):

453 part_strings.append(build_regexp(part, None))

454 else:

455 part_strings.append(part)

456 or_group = '|'.join(part_strings)

457 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()

458 if compile_patterns:

459 return re.compile(regexp)

460 else:

461 return regexp

462

463

464class Inliner:

465

466 """

467 Parse inline markup; call the `parse()` method.

468 """

469

470 def __init__(self) -> None:

471 self.implicit_dispatch = []

472 """List of (pattern, bound method) tuples, used by

473 `self.implicit_inline`."""

474

475 def init_customizations(self, settings) -> None:

476 # lookahead and look-behind expressions for inline markup rules

477 if getattr(settings, 'character_level_inline_markup', False):

478 start_string_prefix = '(^|(?<!\x00))'

479 end_string_suffix = ''

480 else:

481 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %

482 (punctuation_chars.openers,

483 punctuation_chars.delimiters))

484 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %

485 (punctuation_chars.closing_delimiters,

486 punctuation_chars.delimiters,

487 punctuation_chars.closers))

488 args = locals().copy()

489 args.update(vars(self.__class__))

490

491 parts = ('initial_inline', start_string_prefix, '',

492 [

493 ('start', '', self.non_whitespace_after, # simple start-strings

494 [r'\*\*', # strong

495 r'\*(?!\*)', # emphasis but not strong

496 r'``', # literal

497 r'_`', # inline internal target

498 r'\|(?!\|)'] # substitution reference

499 ),

500 ('whole', '', end_string_suffix, # whole constructs

501 [ # reference name & end-string

502 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,

503 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',

504 [r'[0-9]+', # manually numbered

505 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)

506 r'\*', # auto-symbol

507 r'(?P<citationlabel>%s)' % self.simplename, # citation ref

508 ]

509 )

510 ]

511 ),

512 ('backquote', # interpreted text or phrase reference

513 '(?P<role>(:%s:)?)' % self.simplename, # optional role

514 self.non_whitespace_after,

515 ['`(?!`)'] # but not literal

516 )

517 ]

518 )

519 self.start_string_prefix = start_string_prefix

520 self.end_string_suffix = end_string_suffix

521 self.parts = parts

522

523 self.patterns = Struct(

524 initial=build_regexp(parts),

525 emphasis=re.compile(self.non_whitespace_escape_before

526 + r'(\*)' + end_string_suffix),

527 strong=re.compile(self.non_whitespace_escape_before

528 + r'(\*\*)' + end_string_suffix),

529 interpreted_or_phrase_ref=re.compile(

530 r"""

531 %(non_unescaped_whitespace_escape_before)s

532 (

533 `

534 (?P<suffix>

535 (?P<role>:%(simplename)s:)?

536 (?P<refend>__?)?

537 )

538 )

539 %(end_string_suffix)s

540 """ % args, re.VERBOSE),

541 embedded_link=re.compile(

542 r"""

543 (

544 (?:[ \n]+|^) # spaces or beginning of line/string

545 < # open bracket

546 %(non_whitespace_after)s

547 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets

548 %(non_whitespace_escape_before)s

549 > # close bracket

550 )

551 $ # end of string

552 """ % args, re.VERBOSE),

553 literal=re.compile(self.non_whitespace_before + '(``)'

554 + end_string_suffix),

555 target=re.compile(self.non_whitespace_escape_before

556 + r'(`)' + end_string_suffix),

557 substitution_ref=re.compile(self.non_whitespace_escape_before

558 + r'(\|_{0,2})'

559 + end_string_suffix),

560 email=re.compile(self.email_pattern % args + '$',

561 re.VERBOSE),

562 uri=re.compile(

563 (r"""

564 %(start_string_prefix)s

565 (?P<whole>

566 (?P<absolute> # absolute URI

567 (?P<scheme> # scheme (http, ftp, mailto)

568 [a-zA-Z][a-zA-Z0-9.+-]*

569 )

570 :

571 (

572 ( # either:

573 (//?)? # hierarchical URI

574 %(uric)s* # URI characters

575 %(uri_end)s # final URI char

576 )

577 ( # optional query

578 \?%(uric)s*

579 %(uri_end)s

580 )?

581 ( # optional fragment

582 \#%(uric)s*

583 %(uri_end)s

584 )?

585 )

586 )

587 | # *OR*

588 (?P<email> # email address

589 """ + self.email_pattern + r"""

590 )

591 )

592 %(end_string_suffix)s

593 """) % args, re.VERBOSE),

594 pep=re.compile(

595 r"""

596 %(start_string_prefix)s

597 (

598 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file

599 |

600 (PEP\s+(?P<pepnum2>\d+)) # reference by name

601 )

602 %(end_string_suffix)s""" % args, re.VERBOSE),

603 rfc=re.compile(

604 r"""

605 %(start_string_prefix)s

606 (RFC(-|\s+)?(?P<rfcnum>\d+))

607 %(end_string_suffix)s""" % args, re.VERBOSE))

608

609 self.implicit_dispatch.append((self.patterns.uri,

610 self.standalone_uri))

611 if settings.pep_references:

612 self.implicit_dispatch.append((self.patterns.pep,

613 self.pep_reference))

614 if settings.rfc_references:

615 self.implicit_dispatch.append((self.patterns.rfc,

616 self.rfc_reference))

617

618 def parse(self, text, lineno, memo, parent):

619 # Needs to be refactored for nested inline markup.

620 # Add nested_parse() method?

621 """

622 Return 2 lists: nodes (text and inline elements), and system_messages.

623

624 Using `self.patterns.initial`, a pattern which matches start-strings

625 (emphasis, strong, interpreted, phrase reference, literal,

626 substitution reference, and inline target) and complete constructs

627 (simple reference, footnote reference), search for a candidate. When

628 one is found, check for validity (e.g., not a quoted '*' character).

629 If valid, search for the corresponding end string if applicable, and

630 check it for validity. If not found or invalid, generate a warning

631 and ignore the start-string. Implicit inline markup (e.g. standalone

632 URIs) is found last.

633

634 :text: source string

635 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`

636 """

637 self.reporter = memo.reporter

638 self.document = memo.document

639 self.language = memo.language

640 self.parent = parent

641 pattern_search = self.patterns.initial.search

642 dispatch = self.dispatch

643 remaining = escape2null(text)

644 processed = []

645 unprocessed = []

646 messages = []

647 while remaining:

648 match = pattern_search(remaining)

649 if match:

650 groups = match.groupdict()

651 method = dispatch[groups['start'] or groups['backquote']

652 or groups['refend'] or groups['fnend']]

653 before, inlines, remaining, sysmessages = method(self, match,

654 lineno)

655 unprocessed.append(before)

656 messages += sysmessages

657 if inlines:

658 processed += self.implicit_inline(''.join(unprocessed),

659 lineno)

660 processed += inlines

661 unprocessed = []

662 else:

663 break

664 remaining = ''.join(unprocessed) + remaining

665 if remaining:

666 processed += self.implicit_inline(remaining, lineno)

667 return processed, messages

668

669 # Inline object recognition

670 # -------------------------

671 # See also init_customizations().

672 non_whitespace_before = r'(?<!\s)'

673 non_whitespace_escape_before = r'(?<![\s\x00])'

674 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'

675 non_whitespace_after = r'(?!\s)'

676 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):

677 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'

678 # Valid URI characters (see RFC 2396 & RFC 2732);

679 # final \x00 allows backslash escapes in URIs:

680 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""

681 # Delimiter indicating the end of a URI (not part of the URI):

682 uri_end_delim = r"""[>]"""

683 # Last URI character; same as uric but no punctuation:

684 urilast = r"""[_~*/=+a-zA-Z0-9]"""

685 # End of a URI (either 'urilast' or 'uric followed by a

686 # uri_end_delim'):

687 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()

688 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""

689 email_pattern = r"""

690 %(emailc)s+(?:\.%(emailc)s+)* # name

691 (?<!\x00)@ # at

692 %(emailc)s+(?:\.%(emailc)s*)* # host

693 %(uri_end)s # final URI char

694 """

695

696 def quoted_start(self, match):

697 """Test if inline markup start-string is 'quoted'.

698

699 'Quoted' in this context means the start-string is enclosed in a pair

700 of matching opening/closing delimiters (not necessarily quotes)

701 or at the end of the match.

702 """

703 string = match.string

704 start = match.start()

705 if start == 0: # start-string at beginning of text

706 return False

707 prestart = string[start - 1]

708 try:

709 poststart = string[match.end()]

710 except IndexError: # start-string at end of text

711 return True # not "quoted" but no markup start-string either

712 return punctuation_chars.match_chars(prestart, poststart)

713

714 def inline_obj(self, match, lineno, end_pattern, nodeclass,

715 restore_backslashes=False):

716 string = match.string

717 matchstart = match.start('start')

718 matchend = match.end('start')

719 if self.quoted_start(match):

720 return string[:matchend], [], string[matchend:], [], ''

721 endmatch = end_pattern.search(string[matchend:])

722 if endmatch and endmatch.start(1): # 1 or more chars

723 text = endmatch.string[:endmatch.start(1)]

724 if restore_backslashes:

725 text = unescape(text, True)

726 textend = matchend + endmatch.end(1)

727 rawsource = unescape(string[matchstart:textend], True)

728 node = nodeclass(rawsource, text)

729 return (string[:matchstart], [node],

730 string[textend:], [], endmatch.group(1))

731 msg = self.reporter.warning(

732 'Inline %s start-string without end-string.'

733 % nodeclass.__name__, line=lineno)

734 text = unescape(string[matchstart:matchend], True)

735 prb = self.problematic(text, text, msg)

736 return string[:matchstart], [prb], string[matchend:], [msg], ''

737

738 def problematic(self, text, rawsource, message):

739 msgid = self.document.set_id(message, self.parent)

740 problematic = nodes.problematic(rawsource, text, refid=msgid)

741 prbid = self.document.set_id(problematic)

742 message.add_backref(prbid)

743 return problematic

744

745 def emphasis(self, match, lineno):

746 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

747 match, lineno, self.patterns.emphasis, nodes.emphasis)

748 return before, inlines, remaining, sysmessages

749

750 def strong(self, match, lineno):

751 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

752 match, lineno, self.patterns.strong, nodes.strong)

753 return before, inlines, remaining, sysmessages

754

755 def interpreted_or_phrase_ref(self, match, lineno):

756 end_pattern = self.patterns.interpreted_or_phrase_ref

757 string = match.string

758 matchstart = match.start('backquote')

759 matchend = match.end('backquote')

760 rolestart = match.start('role')

761 role = match.group('role')

762 position = ''

763 if role:

764 role = role[1:-1]

765 position = 'prefix'

766 elif self.quoted_start(match):

767 return string[:matchend], [], string[matchend:], []

768 endmatch = end_pattern.search(string[matchend:])

769 if endmatch and endmatch.start(1): # 1 or more chars

770 textend = matchend + endmatch.end()

771 if endmatch.group('role'):

772 if role:

773 msg = self.reporter.warning(

774 'Multiple roles in interpreted text (both '

775 'prefix and suffix present; only one allowed).',

776 line=lineno)

777 text = unescape(string[rolestart:textend], True)

778 prb = self.problematic(text, text, msg)

779 return string[:rolestart], [prb], string[textend:], [msg]

780 role = endmatch.group('suffix')[1:-1]

781 position = 'suffix'

782 escaped = endmatch.string[:endmatch.start(1)]

783 rawsource = unescape(string[matchstart:textend], True)

784 if rawsource[-1:] == '_':

785 if role:

786 msg = self.reporter.warning(

787 'Mismatch: both interpreted text role %s and '

788 'reference suffix.' % position, line=lineno)

789 text = unescape(string[rolestart:textend], True)

790 prb = self.problematic(text, text, msg)

791 return string[:rolestart], [prb], string[textend:], [msg]

792 return self.phrase_ref(string[:matchstart], string[textend:],

793 rawsource, escaped)

794 else:

795 rawsource = unescape(string[rolestart:textend], True)

796 nodelist, messages = self.interpreted(rawsource, escaped, role,

797 lineno)

798 return (string[:rolestart], nodelist,

799 string[textend:], messages)

800 msg = self.reporter.warning(

801 'Inline interpreted text or phrase reference start-string '

802 'without end-string.', line=lineno)

803 text = unescape(string[matchstart:matchend], True)

804 prb = self.problematic(text, text, msg)

805 return string[:matchstart], [prb], string[matchend:], [msg]

806

807 def phrase_ref(self, before, after, rawsource, escaped, text=None):

808 # `text` is ignored (since 0.16)

809 match = self.patterns.embedded_link.search(escaped)

810 if match: # embedded <URI> or <alias_>

811 text = escaped[:match.start(0)]

812 unescaped = unescape(text)

813 rawtext = unescape(text, True)

814 aliastext = match.group(2)

815 rawaliastext = unescape(aliastext, True)

816 underscore_escaped = rawaliastext.endswith(r'\_')

817 if (aliastext.endswith('_')

818 and not (underscore_escaped

819 or self.patterns.uri.match(aliastext))):

820 aliastype = 'name'

821 alias = normalize_name(unescape(aliastext[:-1]))

822 target = nodes.target(match.group(1), refname=alias)

823 target.indirect_reference_name = whitespace_normalize_name(

824 unescape(aliastext[:-1]))

825 else:

826 aliastype = 'uri'

827 # remove unescaped whitespace

828 alias_parts = split_escaped_whitespace(match.group(2))

829 alias = ' '.join(''.join(part.split())

830 for part in alias_parts)

831 alias = self.adjust_uri(unescape(alias))

832 if alias.endswith(r'\_'):

833 alias = alias[:-2] + '_'

834 target = nodes.target(match.group(1), refuri=alias)

835 target.referenced = 1

836 if not aliastext:

837 raise ApplicationError('problem with embedded link: %r'

838 % aliastext)

839 if not text:

840 text = alias

841 unescaped = unescape(text)

842 rawtext = rawaliastext

843 else:

844 text = escaped

845 unescaped = unescape(text)

846 target = None

847 rawtext = unescape(escaped, True)

848

849 refname = normalize_name(unescaped)

850 reference = nodes.reference(rawsource, text,

851 name=whitespace_normalize_name(unescaped))

852 reference[0].rawsource = rawtext

853

854 node_list = [reference]

855

856 if rawsource[-2:] == '__':

857 if target and (aliastype == 'name'):

858 reference['refname'] = alias

859 self.document.note_refname(reference)

860 # self.document.note_indirect_target(target) # required?

861 elif target and (aliastype == 'uri'):

862 reference['refuri'] = alias

863 else:

864 reference['anonymous'] = True

865 else:

866 if target:

867 target['names'].append(refname)

868 if aliastype == 'name':

869 reference['refname'] = alias

870 self.document.note_indirect_target(target)

871 self.document.note_refname(reference)

872 else:

873 reference['refuri'] = alias

874 self.document.note_explicit_target(target, self.parent)

875 # target.note_referenced_by(name=refname)

876 node_list.append(target)

877 else:

878 reference['refname'] = refname

879 self.document.note_refname(reference)

880 return before, node_list, after, []

881

882 def adjust_uri(self, uri):

883 match = self.patterns.email.match(uri)

884 if match:

885 return 'mailto:' + uri

886 else:

887 return uri

888

889 def interpreted(self, rawsource, text, role, lineno):

890 role_fn, messages = roles.role(role, self.language, lineno,

891 self.reporter)

892 if role_fn:

893 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)

894 return nodes, messages + messages2

895 else:

896 msg = self.reporter.error(

897 'Unknown interpreted text role "%s".' % role,

898 line=lineno)

899 return ([self.problematic(rawsource, rawsource, msg)],

900 messages + [msg])

901

902 def literal(self, match, lineno):

903 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

904 match, lineno, self.patterns.literal, nodes.literal,

905 restore_backslashes=True)

906 return before, inlines, remaining, sysmessages

907

908 def inline_internal_target(self, match, lineno):

909 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

910 match, lineno, self.patterns.target, nodes.target)

911 if inlines and isinstance(inlines[0], nodes.target):

912 assert len(inlines) == 1

913 target = inlines[0]

914 name = normalize_name(target.astext())

915 target['names'].append(name)

916 self.document.note_explicit_target(target, self.parent)

917 return before, inlines, remaining, sysmessages

918

919 def substitution_reference(self, match, lineno):

920 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

921 match, lineno, self.patterns.substitution_ref,

922 nodes.substitution_reference)

923 if len(inlines) == 1:

924 subref_node = inlines[0]

925 if isinstance(subref_node, nodes.substitution_reference):

926 subref_text = subref_node.astext()

927 self.document.note_substitution_ref(subref_node, subref_text)

928 if endstring[-1:] == '_':

929 reference_node = nodes.reference(

930 '|%s%s' % (subref_text, endstring), '')

931 if endstring[-2:] == '__':

932 reference_node['anonymous'] = True

933 else:

934 reference_node['refname'] = normalize_name(subref_text)

935 self.document.note_refname(reference_node)

936 reference_node += subref_node

937 inlines = [reference_node]

938 return before, inlines, remaining, sysmessages

939

940 def footnote_reference(self, match, lineno):

941 """

942 Handles `nodes.footnote_reference` and `nodes.citation_reference`

943 elements.

944 """

945 label = match.group('footnotelabel')

946 refname = normalize_name(label)

947 string = match.string

948 before = string[:match.start('whole')]

949 remaining = string[match.end('whole'):]

950 if match.group('citationlabel'):

951 refnode = nodes.citation_reference('[%s]_' % label,

952 refname=refname)

953 refnode += nodes.Text(label)

954 self.document.note_citation_ref(refnode)

955 else:

956 refnode = nodes.footnote_reference('[%s]_' % label)

957 if refname[0] == '#':

958 refname = refname[1:]

959 refnode['auto'] = 1

960 self.document.note_autofootnote_ref(refnode)

961 elif refname == '*':

962 refname = ''

963 refnode['auto'] = '*'

964 self.document.note_symbol_footnote_ref(

965 refnode)

966 else:

967 refnode += nodes.Text(label)

968 if refname:

969 refnode['refname'] = refname

970 self.document.note_footnote_ref(refnode)

971 if utils.get_trim_footnote_ref_space(self.document.settings):

972 before = before.rstrip()

973 return before, [refnode], remaining, []

974

975 def reference(self, match, lineno, anonymous=False):

976 referencename = match.group('refname')

977 refname = normalize_name(referencename)

978 referencenode = nodes.reference(

979 referencename + match.group('refend'), referencename,

980 name=whitespace_normalize_name(referencename))

981 referencenode[0].rawsource = referencename

982 if anonymous:

983 referencenode['anonymous'] = True

984 else:

985 referencenode['refname'] = refname

986 self.document.note_refname(referencenode)

987 string = match.string

988 matchstart = match.start('whole')

989 matchend = match.end('whole')

990 return string[:matchstart], [referencenode], string[matchend:], []

991

992 def anonymous_reference(self, match, lineno):

993 return self.reference(match, lineno, anonymous=True)

994

995 def standalone_uri(self, match, lineno):

996 if (not match.group('scheme')

997 or match.group('scheme').lower() in urischemes.schemes):

998 if match.group('email'):

999 addscheme = 'mailto:'

1000 else:

1001 addscheme = ''

1002 text = match.group('whole')

1003 refuri = addscheme + unescape(text)

1004 reference = nodes.reference(unescape(text, True), text,

1005 refuri=refuri)

1006 return [reference]

1007 else: # not a valid scheme

1008 raise MarkupMismatch

1009

1010 def pep_reference(self, match, lineno):

1011 text = match.group(0)

1012 if text.startswith('pep-'):

1013 pepnum = int(unescape(match.group('pepnum1')))

1014 elif text.startswith('PEP'):

1015 pepnum = int(unescape(match.group('pepnum2')))

1016 else:

1017 raise MarkupMismatch

1018 ref = (self.document.settings.pep_base_url

1019 + self.document.settings.pep_file_url_template % pepnum)

1020 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1021

1022 rfc_url = 'rfc%d.html'

1023

1024 def rfc_reference(self, match, lineno):

1025 text = match.group(0)

1026 if text.startswith('RFC'):

1027 rfcnum = int(unescape(match.group('rfcnum')))

1028 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum

1029 else:

1030 raise MarkupMismatch

1031 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1032

1033 def implicit_inline(self, text, lineno):

1034 """

1035 Check each of the patterns in `self.implicit_dispatch` for a match,

1036 and dispatch to the stored method for the pattern. Recursively check

1037 the text before and after the match. Return a list of `nodes.Text`

1038 and inline element nodes.

1039 """

1040 if not text:

1041 return []

1042 for pattern, method in self.implicit_dispatch:

1043 match = pattern.search(text)

1044 if match:

1045 try:

1046 # Must recurse on strings before *and* after the match;

1047 # there may be multiple patterns.

1048 return (self.implicit_inline(text[:match.start()], lineno)

1049 + method(match, lineno)

1050 + self.implicit_inline(text[match.end():], lineno))

1051 except MarkupMismatch:

1052 pass

1053 return [nodes.Text(text)]

1054

1055 dispatch = {'*': emphasis,

1056 '**': strong,

1057 '`': interpreted_or_phrase_ref,

1058 '``': literal,

1059 '_`': inline_internal_target,

1060 ']_': footnote_reference,

1061 '|': substitution_reference,

1062 '_': reference,

1063 '__': anonymous_reference}

1064

1065

1066def _loweralpha_to_int(s, _zero=(ord('a')-1)):

1067 return ord(s) - _zero

1068

1069

1070def _upperalpha_to_int(s, _zero=(ord('A')-1)):

1071 return ord(s) - _zero

1072

1073

1074class Body(RSTState):

1075

1076 """

1077 Generic classifier of the first line of a block.

1078 """

1079

1080 double_width_pad_char = tableparser.TableParser.double_width_pad_char

1081 """Padding character for East Asian double-width text."""

1082

1083 enum = Struct()

1084 """Enumerated list parsing information."""

1085

1086 enum.formatinfo = {

1087 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),

1088 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),

1089 'period': Struct(prefix='', suffix='.', start=0, end=-1)}

1090 enum.formats = enum.formatinfo.keys()

1091 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',

1092 'lowerroman', 'upperroman'] # ORDERED!

1093 enum.sequencepats = {'arabic': '[0-9]+',

1094 'loweralpha': '[a-z]',

1095 'upperalpha': '[A-Z]',

1096 'lowerroman': '[ivxlcdm]+',

1097 'upperroman': '[IVXLCDM]+'}

1098 enum.converters = {'arabic': int,

1099 'loweralpha': _loweralpha_to_int,

1100 'upperalpha': _upperalpha_to_int,

1101 'lowerroman': RomanNumeral.from_string,

1102 'upperroman': RomanNumeral.from_string}

1103

1104 enum.sequenceregexps = {}

1105 for sequence in enum.sequences:

1106 enum.sequenceregexps[sequence] = re.compile(

1107 enum.sequencepats[sequence] + '$')

1108

1109 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')

1110 """Matches the top (& bottom) of a full table)."""

1111

1112 simple_table_top_pat = re.compile('=+( +=+)+ *$')

1113 """Matches the top of a simple table."""

1114

1115 simple_table_border_pat = re.compile('=+[ =]*$')

1116 """Matches the bottom & header bottom of a simple table."""

1117

1118 pats = {}

1119 """Fragments of patterns used by transitions."""

1120

1121 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'

1122 pats['alpha'] = '[a-zA-Z]'

1123 pats['alphanum'] = '[a-zA-Z0-9]'

1124 pats['alphanumplus'] = '[a-zA-Z0-9_-]'

1125 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'

1126 '|%(upperroman)s|#)' % enum.sequencepats)

1127 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats

1128 # @@@ Loosen up the pattern? Allow Unicode?

1129 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats

1130 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats

1131 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats

1132 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats

1133

1134 for format in enum.formats:

1135 pats[format] = '(?P<%s>%s%s%s)' % (

1136 format, re.escape(enum.formatinfo[format].prefix),

1137 pats['enum'], re.escape(enum.formatinfo[format].suffix))

1138

1139 patterns = {

1140 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',

1141 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,

1142 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',

1143 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,

1144 'doctest': r'>>>( +|$)',

1145 'line_block': r'\|( +|$)',

1146 'grid_table_top': grid_table_top_pat,

1147 'simple_table_top': simple_table_top_pat,

1148 'explicit_markup': r'\.\.( +|$)',

1149 'anonymous': r'__( +|$)',

1150 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,

1151 'text': r''}

1152 initial_transitions = (

1153 'bullet',

1154 'enumerator',

1155 'field_marker',

1156 'option_marker',

1157 'doctest',

1158 'line_block',

1159 'grid_table_top',

1160 'simple_table_top',

1161 'explicit_markup',

1162 'anonymous',

1163 'line',

1164 'text')

1165

1166 def indent(self, match, context, next_state):

1167 """Block quote."""

1168 (indented, indent, line_offset, blank_finish

1169 ) = self.state_machine.get_indented()

1170 elements = self.block_quote(indented, line_offset)

1171 self.parent += elements

1172 if not blank_finish:

1173 self.parent += self.unindent_warning('Block quote')

1174 return context, next_state, []

1175

1176 def block_quote(self, indented, line_offset):

1177 elements = []

1178 while indented:

1179 blockquote = nodes.block_quote(rawsource='\n'.join(indented))

1180 (blockquote.source, blockquote.line

1181 ) = self.state_machine.get_source_and_line(line_offset+1)

1182 (blockquote_lines,

1183 attribution_lines,

1184 attribution_offset,

1185 indented,

1186 new_line_offset) = self.split_attribution(indented, line_offset)

1187 self.nested_parse(blockquote_lines, line_offset, blockquote)

1188 elements.append(blockquote)

1189 if attribution_lines:

1190 attribution, messages = self.parse_attribution(

1191 attribution_lines, line_offset+attribution_offset)

1192 blockquote += attribution

1193 elements += messages

1194 line_offset = new_line_offset

1195 while indented and not indented[0]:

1196 indented = indented[1:]

1197 line_offset += 1

1198 return elements

1199

1200 # U+2014 is an em-dash:

1201 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')

1202

1203 def split_attribution(self, indented, line_offset):

1204 """

1205 Check for a block quote attribution and split it off:

1206

1207 * First line after a blank line must begin with a dash ("--", "---",

1208 em-dash; matches `self.attribution_pattern`).

1209 * Every line after that must have consistent indentation.

1210 * Attributions must be preceded by block quote content.

1211

1212 Return a tuple of: (block quote content lines, attribution lines,

1213 attribution offset, remaining indented lines, remaining lines offset).

1214 """

1215 blank = None

1216 nonblank_seen = False

1217 for i in range(len(indented)):

1218 line = indented[i].rstrip()

1219 if line:

1220 if nonblank_seen and blank == i - 1: # last line blank

1221 match = self.attribution_pattern.match(line)

1222 if match:

1223 attribution_end, indent = self.check_attribution(

1224 indented, i)

1225 if attribution_end:

1226 a_lines = indented[i:attribution_end]

1227 a_lines.trim_left(match.end(), end=1)

1228 a_lines.trim_left(indent, start=1)

1229 return (indented[:i], a_lines,

1230 i, indented[attribution_end:],

1231 line_offset + attribution_end)

1232 nonblank_seen = True

1233 else:

1234 blank = i

1235 else:

1236 return indented, None, None, None, None

1237

1238 def check_attribution(self, indented, attribution_start):

1239 """

1240 Check attribution shape.

1241 Return the index past the end of the attribution, and the indent.

1242 """

1243 indent = None

1244 i = attribution_start + 1

1245 for i in range(attribution_start + 1, len(indented)):

1246 line = indented[i].rstrip()

1247 if not line:

1248 break

1249 if indent is None:

1250 indent = len(line) - len(line.lstrip())

1251 elif len(line) - len(line.lstrip()) != indent:

1252 return None, None # bad shape; not an attribution

1253 else:

1254 # return index of line after last attribution line:

1255 i += 1

1256 return i, (indent or 0)

1257

1258 def parse_attribution(self, indented, line_offset):

1259 text = '\n'.join(indented).rstrip()

1260 lineno = 1 + line_offset # line_offset is zero-based

1261 textnodes, messages = self.inline_text(text, lineno)

1262 node = nodes.attribution(text, '', *textnodes)

1263 node.source, node.line = self.state_machine.get_source_and_line(lineno)

1264 return node, messages

1265

1266 def bullet(self, match, context, next_state):

1267 """Bullet list item."""

1268 ul = nodes.bullet_list()

1269 ul.source, ul.line = self.state_machine.get_source_and_line()

1270 self.parent += ul

1271 ul['bullet'] = match.string[0]

1272 i, blank_finish = self.list_item(match.end())

1273 ul += i

1274 offset = self.state_machine.line_offset + 1 # next line

1275 new_line_offset, blank_finish = self.nested_list_parse(

1276 self.state_machine.input_lines[offset:],

1277 input_offset=self.state_machine.abs_line_offset() + 1,

1278 node=ul, initial_state='BulletList',

1279 blank_finish=blank_finish)

1280 self.goto_line(new_line_offset)

1281 if not blank_finish:

1282 self.parent += self.unindent_warning('Bullet list')

1283 return [], next_state, []

1284

1285 def list_item(self, indent):

1286 src, srcline = self.state_machine.get_source_and_line()

1287 if self.state_machine.line[indent:]:

1288 indented, line_offset, blank_finish = (

1289 self.state_machine.get_known_indented(indent))

1290 else:

1291 indented, indent, line_offset, blank_finish = (

1292 self.state_machine.get_first_known_indented(indent))

1293 listitem = nodes.list_item('\n'.join(indented))

1294 listitem.source, listitem.line = src, srcline

1295 if indented:

1296 self.nested_parse(indented, input_offset=line_offset,

1297 node=listitem)

1298 return listitem, blank_finish

1299

1300 def enumerator(self, match, context, next_state):

1301 """Enumerated List Item"""

1302 format, sequence, text, ordinal = self.parse_enumerator(match)

1303 if not self.is_enumerated_list_item(ordinal, sequence, format):

1304 raise statemachine.TransitionCorrection('text')

1305 enumlist = nodes.enumerated_list()

1306 self.parent += enumlist

1307 if sequence == '#':

1308 enumlist['enumtype'] = 'arabic'

1309 else:

1310 enumlist['enumtype'] = sequence

1311 enumlist['prefix'] = self.enum.formatinfo[format].prefix

1312 enumlist['suffix'] = self.enum.formatinfo[format].suffix

1313 if ordinal != 1:

1314 enumlist['start'] = ordinal

1315 msg = self.reporter.info(

1316 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'

1317 % (text, ordinal))

1318 self.parent += msg

1319 listitem, blank_finish = self.list_item(match.end())

1320 enumlist += listitem

1321 offset = self.state_machine.line_offset + 1 # next line

1322 newline_offset, blank_finish = self.nested_list_parse(

1323 self.state_machine.input_lines[offset:],

1324 input_offset=self.state_machine.abs_line_offset() + 1,

1325 node=enumlist, initial_state='EnumeratedList',

1326 blank_finish=blank_finish,

1327 extra_settings={'lastordinal': ordinal,

1328 'format': format,

1329 'auto': sequence == '#'})

1330 self.goto_line(newline_offset)

1331 if not blank_finish:

1332 self.parent += self.unindent_warning('Enumerated list')

1333 return [], next_state, []

1334

1335 def parse_enumerator(self, match, expected_sequence=None):

1336 """

1337 Analyze an enumerator and return the results.

1338

1339 :Return:

1340 - the enumerator format ('period', 'parens', or 'rparen'),

1341 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),

1342 - the text of the enumerator, stripped of formatting, and

1343 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;

1344 ``None`` is returned for invalid enumerator text).

1345

1346 The enumerator format has already been determined by the regular

1347 expression match. If `expected_sequence` is given, that sequence is

1348 tried first. If not, we check for Roman numeral 1. This way,

1349 single-character Roman numerals (which are also alphabetical) can be

1350 matched. If no sequence has been matched, all sequences are checked in

1351 order.

1352 """

1353 groupdict = match.groupdict()

1354 sequence = ''

1355 for format in self.enum.formats:

1356 if groupdict[format]: # was this the format matched?

1357 break # yes; keep `format`

1358 else: # shouldn't happen

1359 raise ParserError('enumerator format not matched')

1360 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501

1361 : self.enum.formatinfo[format].end]

1362 if text == '#':

1363 sequence = '#'

1364 elif expected_sequence:

1365 try:

1366 if self.enum.sequenceregexps[expected_sequence].match(text):

1367 sequence = expected_sequence

1368 except KeyError: # shouldn't happen

1369 raise ParserError('unknown enumerator sequence: %s'

1370 % sequence)

1371 elif text == 'i':

1372 sequence = 'lowerroman'

1373 elif text == 'I':

1374 sequence = 'upperroman'

1375 if not sequence:

1376 for sequence in self.enum.sequences:

1377 if self.enum.sequenceregexps[sequence].match(text):

1378 break

1379 else: # shouldn't happen

1380 raise ParserError('enumerator sequence not matched')

1381 if sequence == '#':

1382 ordinal = 1

1383 else:

1384 try:

1385 ordinal = int(self.enum.converters[sequence](text))

1386 except InvalidRomanNumeralError:

1387 ordinal = None

1388 return format, sequence, text, ordinal

1389

1390 def is_enumerated_list_item(self, ordinal, sequence, format):

1391 """

1392 Check validity based on the ordinal value and the second line.

1393

1394 Return true if the ordinal is valid and the second line is blank,

1395 indented, or starts with the next enumerator or an auto-enumerator.

1396 """

1397 if ordinal is None:

1398 return None

1399 try:

1400 next_line = self.state_machine.next_line()

1401 except EOFError: # end of input lines

1402 self.state_machine.previous_line()

1403 return 1

1404 else:

1405 self.state_machine.previous_line()

1406 if not next_line[:1].strip(): # blank or indented

1407 return 1

1408 result = self.make_enumerator(ordinal + 1, sequence, format)

1409 if result:

1410 next_enumerator, auto_enumerator = result

1411 try:

1412 if next_line.startswith((next_enumerator, auto_enumerator)):

1413 return 1

1414 except TypeError:

1415 pass

1416 return None

1417

1418 def make_enumerator(self, ordinal, sequence, format):

1419 """

1420 Construct and return the next enumerated list item marker, and an

1421 auto-enumerator ("#" instead of the regular enumerator).

1422

1423 Return ``None`` for invalid (out of range) ordinals.

1424 """

1425 if sequence == '#':

1426 enumerator = '#'

1427 elif sequence == 'arabic':

1428 enumerator = str(ordinal)

1429 else:

1430 if sequence.endswith('alpha'):

1431 if ordinal > 26:

1432 return None

1433 enumerator = chr(ordinal + ord('a') - 1)

1434 elif sequence.endswith('roman'):

1435 try:

1436 enumerator = RomanNumeral(ordinal).to_uppercase()

1437 except TypeError:

1438 return None

1439 else: # shouldn't happen

1440 raise ParserError('unknown enumerator sequence: "%s"'

1441 % sequence)

1442 if sequence.startswith('lower'):

1443 enumerator = enumerator.lower()

1444 elif sequence.startswith('upper'):

1445 enumerator = enumerator.upper()

1446 else: # shouldn't happen

1447 raise ParserError('unknown enumerator sequence: "%s"'

1448 % sequence)

1449 formatinfo = self.enum.formatinfo[format]

1450 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix

1451 + ' ')

1452 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '

1453 return next_enumerator, auto_enumerator

1454

1455 def field_marker(self, match, context, next_state):

1456 """Field list item."""

1457 field_list = nodes.field_list()

1458 self.parent += field_list

1459 field, blank_finish = self.field(match)

1460 field_list += field

1461 offset = self.state_machine.line_offset + 1 # next line

1462 newline_offset, blank_finish = self.nested_list_parse(

1463 self.state_machine.input_lines[offset:],

1464 input_offset=self.state_machine.abs_line_offset() + 1,

1465 node=field_list, initial_state='FieldList',

1466 blank_finish=blank_finish)

1467 self.goto_line(newline_offset)

1468 if not blank_finish:

1469 self.parent += self.unindent_warning('Field list')

1470 return [], next_state, []

1471

1472 def field(self, match):

1473 name = self.parse_field_marker(match)

1474 src, srcline = self.state_machine.get_source_and_line()

1475 lineno = self.state_machine.abs_line_number()

1476 (indented, indent, line_offset, blank_finish

1477 ) = self.state_machine.get_first_known_indented(match.end())

1478 field_node = nodes.field()

1479 field_node.source = src

1480 field_node.line = srcline

1481 name_nodes, name_messages = self.inline_text(name, lineno)

1482 field_node += nodes.field_name(name, '', *name_nodes)

1483 field_body = nodes.field_body('\n'.join(indented), *name_messages)

1484 field_node += field_body

1485 if indented:

1486 self.parse_field_body(indented, line_offset, field_body)

1487 return field_node, blank_finish

1488

1489 def parse_field_marker(self, match):

1490 """Extract & return field name from a field marker match."""

1491 field = match.group()[1:] # strip off leading ':'

1492 field = field[:field.rfind(':')] # strip off trailing ':' etc.

1493 return field

1494

1495 def parse_field_body(self, indented, offset, node) -> None:

1496 self.nested_parse(indented, input_offset=offset, node=node)

1497

1498 def option_marker(self, match, context, next_state):

1499 """Option list item."""

1500 optionlist = nodes.option_list()

1501 (optionlist.source, optionlist.line

1502 ) = self.state_machine.get_source_and_line()

1503 try:

1504 listitem, blank_finish = self.option_list_item(match)

1505 except MarkupError as error:

1506 # This shouldn't happen; pattern won't match.

1507 msg = self.reporter.error('Invalid option list marker: %s'

1508 % error)

1509 self.parent += msg

1510 (indented, indent, line_offset, blank_finish

1511 ) = self.state_machine.get_first_known_indented(match.end())

1512 elements = self.block_quote(indented, line_offset)

1513 self.parent += elements

1514 if not blank_finish:

1515 self.parent += self.unindent_warning('Option list')

1516 return [], next_state, []

1517 self.parent += optionlist

1518 optionlist += listitem

1519 offset = self.state_machine.line_offset + 1 # next line

1520 newline_offset, blank_finish = self.nested_list_parse(

1521 self.state_machine.input_lines[offset:],

1522 input_offset=self.state_machine.abs_line_offset() + 1,

1523 node=optionlist, initial_state='OptionList',

1524 blank_finish=blank_finish)

1525 self.goto_line(newline_offset)

1526 if not blank_finish:

1527 self.parent += self.unindent_warning('Option list')

1528 return [], next_state, []

1529

1530 def option_list_item(self, match):

1531 offset = self.state_machine.abs_line_offset()

1532 options = self.parse_option_marker(match)

1533 (indented, indent, line_offset, blank_finish

1534 ) = self.state_machine.get_first_known_indented(match.end())

1535 if not indented: # not an option list item

1536 self.goto_line(offset)

1537 raise statemachine.TransitionCorrection('text')

1538 option_group = nodes.option_group('', *options)

1539 description = nodes.description('\n'.join(indented))

1540 option_list_item = nodes.option_list_item('', option_group,

1541 description)

1542 if indented:

1543 self.nested_parse(indented, input_offset=line_offset,

1544 node=description)

1545 return option_list_item, blank_finish

1546

1547 def parse_option_marker(self, match):

1548 """

1549 Return a list of `node.option` and `node.option_argument` objects,

1550 parsed from an option marker match.

1551

1552 :Exception: `MarkupError` for invalid option markers.

1553 """

1554 optlist = []

1555 # split at ", ", except inside < > (complex arguments)

1556 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())

1557 for optionstring in optionstrings:

1558 tokens = optionstring.split()

1559 delimiter = ' '

1560 firstopt = tokens[0].split('=', 1)

1561 if len(firstopt) > 1:

1562 # "--opt=value" form

1563 tokens[:1] = firstopt

1564 delimiter = '='

1565 elif (len(tokens[0]) > 2

1566 and ((tokens[0].startswith('-')

1567 and not tokens[0].startswith('--'))

1568 or tokens[0].startswith('+'))):

1569 # "-ovalue" form

1570 tokens[:1] = [tokens[0][:2], tokens[0][2:]]

1571 delimiter = ''

1572 if len(tokens) > 1 and (tokens[1].startswith('<')

1573 and tokens[-1].endswith('>')):

1574 # "-o <value1 value2>" form; join all values into one token

1575 tokens[1:] = [' '.join(tokens[1:])]

1576 if 0 < len(tokens) <= 2:

1577 option = nodes.option(optionstring)

1578 option += nodes.option_string(tokens[0], tokens[0])

1579 if len(tokens) > 1:

1580 option += nodes.option_argument(tokens[1], tokens[1],

1581 delimiter=delimiter)

1582 optlist.append(option)

1583 else:

1584 raise MarkupError(

1585 'wrong number of option tokens (=%s), should be 1 or 2: '

1586 '"%s"' % (len(tokens), optionstring))

1587 return optlist

1588

1589 def doctest(self, match, context, next_state):

1590 line = self.document.current_line

1591 data = '\n'.join(self.state_machine.get_text_block())

1592 # TODO: Parse with `directives.body.CodeBlock` with

1593 # argument 'pycon' (Python Console) in Docutils 1.0.

1594 n = nodes.doctest_block(data, data)

1595 n.line = line

1596 self.parent += n

1597 return [], next_state, []

1598

1599 def line_block(self, match, context, next_state):

1600 """First line of a line block."""

1601 block = nodes.line_block()

1602 self.parent += block

1603 lineno = self.state_machine.abs_line_number()

1604 (block.source,

1605 block.line) = self.state_machine.get_source_and_line(lineno)

1606 line, messages, blank_finish = self.line_block_line(match, lineno)

1607 block += line

1608 self.parent += messages

1609 if not blank_finish:

1610 offset = self.state_machine.line_offset + 1 # next line

1611 new_line_offset, blank_finish = self.nested_list_parse(

1612 self.state_machine.input_lines[offset:],

1613 input_offset=self.state_machine.abs_line_offset() + 1,

1614 node=block, initial_state='LineBlock',

1615 blank_finish=0)

1616 self.goto_line(new_line_offset)

1617 if not blank_finish:

1618 self.parent += self.reporter.warning(

1619 'Line block ends without a blank line.',

1620 line=lineno+1)

1621 if len(block):

1622 if block[0].indent is None:

1623 block[0].indent = 0

1624 self.nest_line_block_lines(block)

1625 return [], next_state, []

1626

1627 def line_block_line(self, match, lineno):

1628 """Return one line element of a line_block."""

1629 (indented, indent, line_offset, blank_finish

1630 ) = self.state_machine.get_first_known_indented(match.end(),

1631 until_blank=True)

1632 text = '\n'.join(indented)

1633 text_nodes, messages = self.inline_text(text, lineno)

1634 line = nodes.line(text, '', *text_nodes)

1635 (line.source,

1636 line.line) = self.state_machine.get_source_and_line(lineno)

1637 if match.string.rstrip() != '|': # not empty

1638 line.indent = len(match.group(1)) - 1

1639 return line, messages, blank_finish

1640

1641 def nest_line_block_lines(self, block) -> None:

1642 for index in range(1, len(block)):

1643 if getattr(block[index], 'indent', None) is None:

1644 block[index].indent = block[index - 1].indent

1645 self.nest_line_block_segment(block)

1646

1647 def nest_line_block_segment(self, block) -> None:

1648 indents = [item.indent for item in block]

1649 least = min(indents)

1650 new_items = []

1651 new_block = nodes.line_block()

1652 for item in block:

1653 if item.indent > least:

1654 new_block.append(item)

1655 else:

1656 if len(new_block):

1657 self.nest_line_block_segment(new_block)

1658 new_items.append(new_block)

1659 new_block = nodes.line_block()

1660 new_items.append(item)

1661 if len(new_block):

1662 self.nest_line_block_segment(new_block)

1663 new_items.append(new_block)

1664 block[:] = new_items

1665

1666 def grid_table_top(self, match, context, next_state):

1667 """Top border of a full table."""

1668 return self.table_top(match, context, next_state,

1669 self.isolate_grid_table,

1670 tableparser.GridTableParser)

1671

1672 def simple_table_top(self, match, context, next_state):

1673 """Top border of a simple table."""

1674 return self.table_top(match, context, next_state,

1675 self.isolate_simple_table,

1676 tableparser.SimpleTableParser)

1677

1678 def table_top(self, match, context, next_state,

1679 isolate_function, parser_class):

1680 """Top border of a generic table."""

1681 nodelist, blank_finish = self.table(isolate_function, parser_class)

1682 self.parent += nodelist

1683 if not blank_finish:

1684 msg = self.reporter.warning(

1685 'Blank line required after table.',

1686 line=self.state_machine.abs_line_number()+1)

1687 self.parent += msg

1688 return [], next_state, []

1689

1690 def table(self, isolate_function, parser_class):

1691 """Parse a table."""

1692 block, messages, blank_finish = isolate_function()

1693 if block:

1694 try:

1695 parser = parser_class()

1696 tabledata = parser.parse(block)

1697 tableline = (self.state_machine.abs_line_number() - len(block)

1698 + 1)

1699 table = self.build_table(tabledata, tableline)

1700 nodelist = [table] + messages

1701 except tableparser.TableMarkupError as err:

1702 nodelist = self.malformed_table(block, ' '.join(err.args),

1703 offset=err.offset) + messages

1704 else:

1705 nodelist = messages

1706 return nodelist, blank_finish

1707

1708 def isolate_grid_table(self):

1709 messages = []

1710 blank_finish = 1

1711 try:

1712 block = self.state_machine.get_text_block(flush_left=True)

1713 except statemachine.UnexpectedIndentationError as err:

1714 block, src, srcline = err.args

1715 messages.append(self.reporter.error('Unexpected indentation.',

1716 source=src, line=srcline))

1717 blank_finish = 0

1718 block.disconnect()

1719 # for East Asian chars:

1720 block.pad_double_width(self.double_width_pad_char)

1721 width = len(block[0].strip())

1722 for i in range(len(block)):

1723 block[i] = block[i].strip()

1724 if block[i][0] not in '+|': # check left edge

1725 blank_finish = 0

1726 self.state_machine.previous_line(len(block) - i)

1727 del block[i:]

1728 break

1729 if not self.grid_table_top_pat.match(block[-1]): # find bottom

1730 blank_finish = 0

1731 # from second-last to third line of table:

1732 for i in range(len(block) - 2, 1, -1):

1733 if self.grid_table_top_pat.match(block[i]):

1734 self.state_machine.previous_line(len(block) - i + 1)

1735 del block[i+1:]

1736 break

1737 else:

1738 messages.extend(self.malformed_table(block))

1739 return [], messages, blank_finish

1740 for i in range(len(block)): # check right edge

1741 if len(block[i]) != width or block[i][-1] not in '+|':

1742 messages.extend(self.malformed_table(block))

1743 return [], messages, blank_finish

1744 return block, messages, blank_finish

1745

1746 def isolate_simple_table(self):

1747 start = self.state_machine.line_offset

1748 lines = self.state_machine.input_lines

1749 limit = len(lines) - 1

1750 toplen = len(lines[start].strip())

1751 pattern_match = self.simple_table_border_pat.match

1752 found = 0

1753 found_at = None

1754 i = start + 1

1755 while i <= limit:

1756 line = lines[i]

1757 match = pattern_match(line)

1758 if match:

1759 if len(line.strip()) != toplen:

1760 self.state_machine.next_line(i - start)

1761 messages = self.malformed_table(

1762 lines[start:i+1], 'Bottom/header table border does '

1763 'not match top border.')

1764 return [], messages, i == limit or not lines[i+1].strip()

1765 found += 1

1766 found_at = i

1767 if found == 2 or i == limit or not lines[i+1].strip():

1768 end = i

1769 break

1770 i += 1

1771 else: # reached end of input_lines

1772 if found:

1773 extra = ' or no blank line after table bottom'

1774 self.state_machine.next_line(found_at - start)

1775 block = lines[start:found_at+1]

1776 else:

1777 extra = ''

1778 self.state_machine.next_line(i - start - 1)

1779 block = lines[start:]

1780 messages = self.malformed_table(

1781 block, 'No bottom table border found%s.' % extra)

1782 return [], messages, not extra

1783 self.state_machine.next_line(end - start)

1784 block = lines[start:end+1]

1785 # for East Asian chars:

1786 block.pad_double_width(self.double_width_pad_char)

1787 return block, [], end == limit or not lines[end+1].strip()

1788

1789 def malformed_table(self, block, detail='', offset=0):

1790 block.replace(self.double_width_pad_char, '')

1791 data = '\n'.join(block)

1792 message = 'Malformed table.'

1793 startline = self.state_machine.abs_line_number() - len(block) + 1

1794 if detail:

1795 message += '\n' + detail

1796 error = self.reporter.error(message, nodes.literal_block(data, data),

1797 line=startline+offset)

1798 return [error]

1799

1800 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):

1801 colwidths, headrows, bodyrows = tabledata

1802 table = nodes.table()

1803 if widths == 'auto':

1804 table['classes'] += ['colwidths-auto']

1805 elif widths: # "grid" or list of integers

1806 table['classes'] += ['colwidths-given']

1807 tgroup = nodes.tgroup(cols=len(colwidths))

1808 table += tgroup

1809 for colwidth in colwidths:

1810 colspec = nodes.colspec(colwidth=colwidth)

1811 if stub_columns:

1812 colspec.attributes['stub'] = True

1813 stub_columns -= 1

1814 tgroup += colspec

1815 if headrows:

1816 thead = nodes.thead()

1817 tgroup += thead

1818 for row in headrows:

1819 thead += self.build_table_row(row, tableline)

1820 tbody = nodes.tbody()

1821 tgroup += tbody

1822 for row in bodyrows:

1823 tbody += self.build_table_row(row, tableline)

1824 return table

1825

1826 def build_table_row(self, rowdata, tableline):

1827 row = nodes.row()

1828 for cell in rowdata:

1829 if cell is None:

1830 continue

1831 morerows, morecols, offset, cellblock = cell

1832 attributes = {}

1833 if morerows:

1834 attributes['morerows'] = morerows

1835 if morecols:

1836 attributes['morecols'] = morecols

1837 entry = nodes.entry(**attributes)

1838 row += entry

1839 if ''.join(cellblock):

1840 self.nested_parse(cellblock, input_offset=tableline+offset,

1841 node=entry)

1842 return row

1843

1844 explicit = Struct()

1845 """Patterns and constants used for explicit markup recognition."""

1846

1847 explicit.patterns = Struct(

1848 target=re.compile(r"""

1849 (

1850 _ # anonymous target

1851 | # *OR*

1852 (?!_) # no underscore at the beginning

1853 (?P<quote>`?) # optional open quote

1854 (?![ `]) # first char. not space or

1855 # backquote

1856 (?P<name> # reference name

1857 .+?

1858 )

1859 %(non_whitespace_escape_before)s

1860 (?P=quote) # close quote if open quote used

1861 )

1862 (?<!(?<!\x00):) # no unescaped colon at end

1863 %(non_whitespace_escape_before)s

1864 [ ]? # optional space

1865 : # end of reference name

1866 ([ ]+|$) # followed by whitespace

1867 """ % vars(Inliner), re.VERBOSE),

1868 reference=re.compile(r"""

1869 (

1870 (?P<simple>%(simplename)s)_

1871 | # *OR*

1872 ` # open backquote

1873 (?![ ]) # not space

1874 (?P<phrase>.+?) # hyperlink phrase

1875 %(non_whitespace_escape_before)s

1876 `_ # close backquote,

1877 # reference mark

1878 )

1879 $ # end of string

1880 """ % vars(Inliner), re.VERBOSE),

1881 substitution=re.compile(r"""

1882 (

1883 (?![ ]) # first char. not space

1884 (?P<name>.+?) # substitution text

1885 %(non_whitespace_escape_before)s

1886 \| # close delimiter

1887 )

1888 ([ ]+|$) # followed by whitespace

1889 """ % vars(Inliner),

1890 re.VERBOSE),)

1891

1892 def footnote(self, match):

1893 src, srcline = self.state_machine.get_source_and_line()

1894 (indented, indent, offset, blank_finish

1895 ) = self.state_machine.get_first_known_indented(match.end())

1896 label = match.group(1)

1897 name = normalize_name(label)

1898 footnote = nodes.footnote('\n'.join(indented))

1899 footnote.source = src

1900 footnote.line = srcline

1901 if name[0] == '#': # auto-numbered

1902 name = name[1:] # autonumber label

1903 footnote['auto'] = 1

1904 if name:

1905 footnote['names'].append(name)

1906 self.document.note_autofootnote(footnote)

1907 elif name == '*': # auto-symbol

1908 name = ''

1909 footnote['auto'] = '*'

1910 self.document.note_symbol_footnote(footnote)

1911 else: # manually numbered

1912 footnote += nodes.label('', label)

1913 footnote['names'].append(name)

1914 self.document.note_footnote(footnote)

1915 if name:

1916 self.document.note_explicit_target(footnote, footnote)

1917 else:

1918 self.document.set_id(footnote, footnote)

1919 if indented:

1920 self.nested_parse(indented, input_offset=offset, node=footnote)

1921 else:

1922 footnote += self.reporter.warning('Footnote content expected.')

1923 return [footnote], blank_finish

1924

1925 def citation(self, match):

1926 src, srcline = self.state_machine.get_source_and_line()

1927 (indented, indent, offset, blank_finish

1928 ) = self.state_machine.get_first_known_indented(match.end())

1929 label = match.group(1)

1930 name = normalize_name(label)

1931 citation = nodes.citation('\n'.join(indented))

1932 citation.source = src

1933 citation.line = srcline

1934 citation += nodes.label('', label)

1935 citation['names'].append(name)

1936 self.document.note_citation(citation)

1937 self.document.note_explicit_target(citation, citation)

1938 if indented:

1939 self.nested_parse(indented, input_offset=offset, node=citation)

1940 else:

1941 citation += self.reporter.warning('Citation content expected.')

1942 return [citation], blank_finish

1943

1944 def hyperlink_target(self, match):

1945 pattern = self.explicit.patterns.target

1946 lineno = self.state_machine.abs_line_number()

1947 (block, indent, offset, blank_finish

1948 ) = self.state_machine.get_first_known_indented(

1949 match.end(), until_blank=True, strip_indent=False)

1950 blocktext = match.string[:match.end()] + '\n'.join(block)

1951 block = [escape2null(line) for line in block]

1952 escaped = block[0]

1953 blockindex = 0

1954 while True:

1955 targetmatch = pattern.match(escaped)

1956 if targetmatch:

1957 break

1958 blockindex += 1

1959 try:

1960 escaped += block[blockindex]

1961 except IndexError:

1962 raise MarkupError('malformed hyperlink target.')

1963 del block[:blockindex]

1964 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()

1965 target = self.make_target(block, blocktext, lineno,

1966 targetmatch.group('name'))

1967 return [target], blank_finish

1968

1969 def make_target(self, block, block_text, lineno, target_name):

1970 target_type, data = self.parse_target(block, block_text, lineno)

1971 if target_type == 'refname':

1972 target = nodes.target(block_text, '', refname=normalize_name(data))

1973 target.indirect_reference_name = data

1974 self.add_target(target_name, '', target, lineno)

1975 self.document.note_indirect_target(target)

1976 return target

1977 elif target_type == 'refuri':

1978 target = nodes.target(block_text, '')

1979 self.add_target(target_name, data, target, lineno)

1980 return target

1981 else:

1982 return data

1983

1984 def parse_target(self, block, block_text, lineno):

1985 """

1986 Determine the type of reference of a target.

1987

1988 :Return: A 2-tuple, one of:

1989

1990 - 'refname' and the indirect reference name

1991 - 'refuri' and the URI

1992 - 'malformed' and a system_message node

1993 """

1994 if block and block[-1].strip()[-1:] == '_': # possible indirect target

1995 reference = ' '.join(line.strip() for line in block)

1996 refname = self.is_reference(reference)

1997 if refname:

1998 return 'refname', refname

1999 ref_parts = split_escaped_whitespace(' '.join(block))

2000 reference = ' '.join(''.join(unescape(part).split())

2001 for part in ref_parts)

2002 return 'refuri', reference

2003

2004 def is_reference(self, reference):

2005 match = self.explicit.patterns.reference.match(

2006 whitespace_normalize_name(reference))

2007 if not match:

2008 return None

2009 return unescape(match.group('simple') or match.group('phrase'))

2010

2011 def add_target(self, targetname, refuri, target, lineno):

2012 target.line = lineno

2013 if targetname:

2014 name = normalize_name(unescape(targetname))

2015 target['names'].append(name)

2016 if refuri:

2017 uri = self.inliner.adjust_uri(refuri)

2018 if uri:

2019 target['refuri'] = uri

2020 else:

2021 raise ApplicationError('problem with URI: %r' % refuri)

2022 self.document.note_explicit_target(target, self.parent)

2023 else: # anonymous target

2024 if refuri:

2025 target['refuri'] = refuri

2026 target['anonymous'] = True

2027 self.document.note_anonymous_target(target)

2028

2029 def substitution_def(self, match):

2030 pattern = self.explicit.patterns.substitution

2031 src, srcline = self.state_machine.get_source_and_line()

2032 (block, indent, offset, blank_finish

2033 ) = self.state_machine.get_first_known_indented(match.end(),

2034 strip_indent=False)

2035 blocktext = (match.string[:match.end()] + '\n'.join(block))

2036 block.disconnect()

2037 escaped = escape2null(block[0].rstrip())

2038 blockindex = 0

2039 while True:

2040 subdefmatch = pattern.match(escaped)

2041 if subdefmatch:

2042 break

2043 blockindex += 1

2044 try:

2045 escaped = escaped + ' ' + escape2null(

2046 block[blockindex].strip())

2047 except IndexError:

2048 raise MarkupError('malformed substitution definition.')

2049 del block[:blockindex] # strip out the substitution marker

2050 start = subdefmatch.end()-len(escaped)-1

2051 block[0] = (block[0].strip() + ' ')[start:-1]

2052 if not block[0]:

2053 del block[0]

2054 offset += 1

2055 while block and not block[-1].strip():

2056 block.pop()

2057 subname = subdefmatch.group('name')

2058 substitution_node = nodes.substitution_definition(blocktext)

2059 substitution_node.source = src

2060 substitution_node.line = srcline

2061 if not block:

2062 msg = self.reporter.warning(

2063 'Substitution definition "%s" missing contents.' % subname,

2064 nodes.literal_block(blocktext, blocktext),

2065 source=src, line=srcline)

2066 return [msg], blank_finish

2067 block[0] = block[0].strip()

2068 substitution_node['names'].append(

2069 nodes.whitespace_normalize_name(subname))

2070 new_abs_offset, blank_finish = self.nested_list_parse(

2071 block, input_offset=offset, node=substitution_node,

2072 initial_state='SubstitutionDef', blank_finish=blank_finish)

2073 i = 0

2074 for node in substitution_node[:]:

2075 if not (isinstance(node, nodes.Inline)

2076 or isinstance(node, nodes.Text)):

2077 self.parent += substitution_node[i]

2078 del substitution_node[i]

2079 else:

2080 i += 1

2081 for node in substitution_node.findall(nodes.Element):

2082 if self.disallowed_inside_substitution_definitions(node):

2083 pformat = nodes.literal_block('', node.pformat().rstrip())

2084 msg = self.reporter.error(

2085 'Substitution definition contains illegal element <%s>:'

2086 % node.tagname,

2087 pformat, nodes.literal_block(blocktext, blocktext),

2088 source=src, line=srcline)

2089 return [msg], blank_finish

2090 if len(substitution_node) == 0:

2091 msg = self.reporter.warning(

2092 'Substitution definition "%s" empty or invalid.' % subname,

2093 nodes.literal_block(blocktext, blocktext),

2094 source=src, line=srcline)

2095 return [msg], blank_finish

2096 self.document.note_substitution_def(

2097 substitution_node, subname, self.parent)

2098 return [substitution_node], blank_finish

2099

2100 def disallowed_inside_substitution_definitions(self, node) -> bool:

2101 if (node['ids']

2102 or isinstance(node, nodes.reference) and node.get('anonymous')

2103 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501

2104 return True

2105 else:

2106 return False

2107

2108 def directive(self, match, **option_presets):

2109 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""

2110 type_name = match.group(1)

2111 directive_class, messages = directives.directive(

2112 type_name, self.memo.language, self.document)

2113 self.parent += messages

2114 if directive_class:

2115 return self.run_directive(

2116 directive_class, match, type_name, option_presets)

2117 else:

2118 return self.unknown_directive(type_name)

2119

2120 def run_directive(self, directive, match, type_name, option_presets):

2121 """

2122 Parse a directive then run its directive function.

2123

2124 Parameters:

2125

2126 - `directive`: The class implementing the directive. Must be

2127 a subclass of `rst.Directive`.

2128

2129 - `match`: A regular expression match object which matched the first

2130 line of the directive.

2131

2132 - `type_name`: The directive name, as used in the source text.

2133

2134 - `option_presets`: A dictionary of preset options, defaults for the

2135 directive options. Currently, only an "alt" option is passed by

2136 substitution definitions (value: the substitution name), which may

2137 be used by an embedded image directive.

2138

2139 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.

2140 """

2141 if isinstance(directive, (FunctionType, MethodType)):

2142 from docutils.parsers.rst import convert_directive_function

2143 directive = convert_directive_function(directive)

2144 lineno = self.state_machine.abs_line_number()

2145 initial_line_offset = self.state_machine.line_offset

2146 (indented, indent, line_offset, blank_finish

2147 ) = self.state_machine.get_first_known_indented(match.end(),

2148 strip_top=0)

2149 block_text = '\n'.join(self.state_machine.input_lines[

2150 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501

2151 try:

2152 arguments, options, content, content_offset = (

2153 self.parse_directive_block(indented, line_offset,

2154 directive, option_presets))

2155 except MarkupError as detail:

2156 error = self.reporter.error(

2157 'Error in "%s" directive:\n%s.' % (type_name,

2158 ' '.join(detail.args)),

2159 nodes.literal_block(block_text, block_text), line=lineno)

2160 return [error], blank_finish

2161 directive_instance = directive(

2162 type_name, arguments, options, content, lineno,

2163 content_offset, block_text, self, self.state_machine)

2164 try:

2165 result = directive_instance.run()

2166 except docutils.parsers.rst.DirectiveError as error:

2167 msg_node = self.reporter.system_message(error.level, error.msg,

2168 line=lineno)

2169 msg_node += nodes.literal_block(block_text, block_text)

2170 result = [msg_node]

2171 assert isinstance(result, list), \

2172 'Directive "%s" must return a list of nodes.' % type_name

2173 for i in range(len(result)):

2174 assert isinstance(result[i], nodes.Node), \

2175 ('Directive "%s" returned non-Node object (index %s): %r'

2176 % (type_name, i, result[i]))

2177 return (result,

2178 blank_finish or self.state_machine.is_next_line_blank())

2179

2180 def parse_directive_block(self, indented, line_offset, directive,

2181 option_presets):

2182 option_spec = directive.option_spec

2183 has_content = directive.has_content

2184 if indented and not indented[0].strip():

2185 indented.trim_start()

2186 line_offset += 1

2187 while indented and not indented[-1].strip():

2188 indented.trim_end()

2189 if indented and (directive.required_arguments

2190 or directive.optional_arguments

2191 or option_spec):

2192 for i, line in enumerate(indented):

2193 if not line.strip():

2194 break

2195 else:

2196 i += 1

2197 arg_block = indented[:i]

2198 content = indented[i+1:]

2199 content_offset = line_offset + i + 1

2200 else:

2201 content = indented

2202 content_offset = line_offset

2203 arg_block = []

2204 if option_spec:

2205 options, arg_block = self.parse_directive_options(

2206 option_presets, option_spec, arg_block)

2207 else:

2208 options = {}

2209 if arg_block and not (directive.required_arguments

2210 or directive.optional_arguments):

2211 content = arg_block + indented[i:]

2212 content_offset = line_offset

2213 arg_block = []

2214 while content and not content[0].strip():

2215 content.trim_start()

2216 content_offset += 1

2217 if directive.required_arguments or directive.optional_arguments:

2218 arguments = self.parse_directive_arguments(

2219 directive, arg_block)

2220 else:

2221 arguments = []

2222 if content and not has_content:

2223 raise MarkupError('no content permitted')

2224 return arguments, options, content, content_offset

2225

2226 def parse_directive_options(self, option_presets, option_spec, arg_block):

2227 options = option_presets.copy()

2228 for i, line in enumerate(arg_block):

2229 if re.match(Body.patterns['field_marker'], line):

2230 opt_block = arg_block[i:]

2231 arg_block = arg_block[:i]

2232 break

2233 else:

2234 opt_block = []

2235 if opt_block:

2236 success, data = self.parse_extension_options(option_spec,

2237 opt_block)

2238 if success: # data is a dict of options

2239 options.update(data)

2240 else: # data is an error string

2241 raise MarkupError(data)

2242 return options, arg_block

2243

2244 def parse_directive_arguments(self, directive, arg_block):

2245 required = directive.required_arguments

2246 optional = directive.optional_arguments

2247 arg_text = '\n'.join(arg_block)

2248 arguments = arg_text.split()

2249 if len(arguments) < required:

2250 raise MarkupError('%s argument(s) required, %s supplied'

2251 % (required, len(arguments)))

2252 elif len(arguments) > required + optional:

2253 if directive.final_argument_whitespace:

2254 arguments = arg_text.split(None, required + optional - 1)

2255 else:

2256 raise MarkupError(

2257 'maximum %s argument(s) allowed, %s supplied'

2258 % (required + optional, len(arguments)))

2259 return arguments

2260

2261 def parse_extension_options(self, option_spec, datalines):

2262 """

2263 Parse `datalines` for a field list containing extension options

2264 matching `option_spec`.

2265

2266 :Parameters:

2267 - `option_spec`: a mapping of option name to conversion

2268 function, which should raise an exception on bad input.

2269 - `datalines`: a list of input strings.

2270

2271 :Return:

2272 - Success value, 1 or 0.

2273 - An option dictionary on success, an error string on failure.

2274 """

2275 node = nodes.field_list()

2276 newline_offset, blank_finish = self.nested_list_parse(

2277 datalines, 0, node, initial_state='ExtensionOptions',

2278 blank_finish=True)

2279 if newline_offset != len(datalines): # incomplete parse of block

2280 return 0, 'invalid option block'

2281 try:

2282 options = utils.extract_extension_options(node, option_spec)

2283 except KeyError as detail:

2284 return 0, 'unknown option: "%s"' % detail.args[0]

2285 except (ValueError, TypeError) as detail:

2286 return 0, 'invalid option value: %s' % ' '.join(detail.args)

2287 except utils.ExtensionOptionError as detail:

2288 return 0, 'invalid option data: %s' % ' '.join(detail.args)

2289 if blank_finish:

2290 return 1, options

2291 else:

2292 return 0, 'option data incompletely parsed'

2293

2294 def unknown_directive(self, type_name):

2295 lineno = self.state_machine.abs_line_number()

2296 (indented, indent, offset, blank_finish

2297 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)

2298 text = '\n'.join(indented)

2299 error = self.reporter.error('Unknown directive type "%s".' % type_name,

2300 nodes.literal_block(text, text),

2301 line=lineno)

2302 return [error], blank_finish

2303

2304 def comment(self, match):

2305 if self.state_machine.is_next_line_blank():

2306 first_comment_line = match.string[match.end():]

2307 if not first_comment_line.strip(): # empty comment

2308 return [nodes.comment()], True # "A tiny but practical wart."

2309 if first_comment_line.startswith('end of inclusion from "'):

2310 # cf. parsers.rst.directives.misc.Include

2311 self.document.include_log.pop()

2312 return [], True

2313 (indented, indent, offset, blank_finish

2314 ) = self.state_machine.get_first_known_indented(match.end())

2315 while indented and not indented[-1].strip():

2316 indented.trim_end()

2317 text = '\n'.join(indented)

2318 return [nodes.comment(text, text)], blank_finish

2319

2320 explicit.constructs = [

2321 (footnote,

2322 re.compile(r"""

2323 \.\.[ ]+ # explicit markup start

2324 \[

2325 ( # footnote label:

2326 [0-9]+ # manually numbered footnote

2327 | # *OR*

2328 \# # anonymous auto-numbered footnote

2329 | # *OR*

2330 \#%s # auto-number ed?) footnote label

2331 | # *OR*

2332 \* # auto-symbol footnote

2333 )

2334 \]

2335 ([ ]+|$) # whitespace or end of line

2336 """ % Inliner.simplename, re.VERBOSE)),

2337 (citation,

2338 re.compile(r"""

2339 \.\.[ ]+ # explicit markup start

2340 \[(%s)\] # citation label

2341 ([ ]+|$) # whitespace or end of line

2342 """ % Inliner.simplename, re.VERBOSE)),

2343 (hyperlink_target,

2344 re.compile(r"""

2345 \.\.[ ]+ # explicit markup start

2346 _ # target indicator

2347 (?![ ]|$) # first char. not space or EOL

2348 """, re.VERBOSE)),

2349 (substitution_def,

2350 re.compile(r"""

2351 \.\.[ ]+ # explicit markup start

2352 \| # substitution indicator

2353 (?![ ]|$) # first char. not space or EOL

2354 """, re.VERBOSE)),

2355 (directive,

2356 re.compile(r"""

2357 \.\.[ ]+ # explicit markup start

2358 (%s) # directive name

2359 [ ]? # optional space

2360 :: # directive delimiter

2361 ([ ]+|$) # whitespace or end of line

2362 """ % Inliner.simplename, re.VERBOSE))]

2363

2364 def explicit_markup(self, match, context, next_state):

2365 """Footnotes, hyperlink targets, directives, comments."""

2366 nodelist, blank_finish = self.explicit_construct(match)

2367 self.parent += nodelist

2368 self.explicit_list(blank_finish)

2369 return [], next_state, []

2370

2371 def explicit_construct(self, match):

2372 """Determine which explicit construct this is, parse & return it."""

2373 errors = []

2374 for method, pattern in self.explicit.constructs:

2375 expmatch = pattern.match(match.string)

2376 if expmatch:

2377 try:

2378 return method(self, expmatch)

2379 except MarkupError as error:

2380 lineno = self.state_machine.abs_line_number()

2381 message = ' '.join(error.args)

2382 errors.append(self.reporter.warning(message, line=lineno))

2383 break

2384 nodelist, blank_finish = self.comment(match)

2385 return nodelist + errors, blank_finish

2386

2387 def explicit_list(self, blank_finish) -> None:

2388 """

2389 Create a nested state machine for a series of explicit markup

2390 constructs (including anonymous hyperlink targets).

2391 """

2392 offset = self.state_machine.line_offset + 1 # next line

2393 newline_offset, blank_finish = self.nested_list_parse(

2394 self.state_machine.input_lines[offset:],

2395 input_offset=self.state_machine.abs_line_offset() + 1,

2396 node=self.parent, initial_state='Explicit',

2397 blank_finish=blank_finish,

2398 match_titles=self.state_machine.match_titles)

2399 self.goto_line(newline_offset)

2400 if not blank_finish:

2401 self.parent += self.unindent_warning('Explicit markup')

2402

2403 def anonymous(self, match, context, next_state):

2404 """Anonymous hyperlink targets."""

2405 nodelist, blank_finish = self.anonymous_target(match)

2406 self.parent += nodelist

2407 self.explicit_list(blank_finish)

2408 return [], next_state, []

2409

2410 def anonymous_target(self, match):

2411 lineno = self.state_machine.abs_line_number()

2412 (block, indent, offset, blank_finish

2413 ) = self.state_machine.get_first_known_indented(match.end(),

2414 until_blank=True)

2415 blocktext = match.string[:match.end()] + '\n'.join(block)

2416 block = [escape2null(line) for line in block]

2417 target = self.make_target(block, blocktext, lineno, '')

2418 return [target], blank_finish

2419

2420 def line(self, match, context, next_state):

2421 """Section title overline or transition marker."""

2422 if self.state_machine.match_titles:

2423 return [match.string], 'Line', []

2424 elif match.string.strip() == '::':

2425 raise statemachine.TransitionCorrection('text')

2426 elif len(match.string.strip()) < 4:

2427 msg = self.reporter.info(

2428 'Unexpected possible title overline or transition.\n'

2429 "Treating it as ordinary text because it's so short.",

2430 line=self.state_machine.abs_line_number())

2431 self.parent += msg

2432 raise statemachine.TransitionCorrection('text')

2433 else:

2434 blocktext = self.state_machine.line

2435 msg = self.reporter.severe(

2436 'Unexpected section title or transition.',

2437 nodes.literal_block(blocktext, blocktext),

2438 line=self.state_machine.abs_line_number())

2439 self.parent += msg

2440 return [], next_state, []

2441

2442 def text(self, match, context, next_state):

2443 """Titles, definition lists, paragraphs."""

2444 return [match.string], 'Text', []

2445

2446

2447class RFC2822Body(Body):

2448

2449 """

2450 RFC2822 headers are only valid as the first constructs in documents. As

2451 soon as anything else appears, the `Body` state should take over.

2452 """

2453

2454 patterns = Body.patterns.copy() # can't modify the original

2455 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'

2456 initial_transitions = [(name, 'Body')

2457 for name in Body.initial_transitions]

2458 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'

2459

2460 def rfc2822(self, match, context, next_state):

2461 """RFC2822-style field list item."""

2462 fieldlist = nodes.field_list(classes=['rfc2822'])

2463 self.parent += fieldlist

2464 field, blank_finish = self.rfc2822_field(match)

2465 fieldlist += field

2466 offset = self.state_machine.line_offset + 1 # next line

2467 newline_offset, blank_finish = self.nested_list_parse(

2468 self.state_machine.input_lines[offset:],

2469 input_offset=self.state_machine.abs_line_offset() + 1,

2470 node=fieldlist, initial_state='RFC2822List',

2471 blank_finish=blank_finish)

2472 self.goto_line(newline_offset)

2473 if not blank_finish:

2474 self.parent += self.unindent_warning(

2475 'RFC2822-style field list')

2476 return [], next_state, []

2477

2478 def rfc2822_field(self, match):

2479 name = match.string[:match.string.find(':')]

2480 (indented, indent, line_offset, blank_finish

2481 ) = self.state_machine.get_first_known_indented(match.end(),

2482 until_blank=True)

2483 fieldnode = nodes.field()

2484 fieldnode += nodes.field_name(name, name)

2485 fieldbody = nodes.field_body('\n'.join(indented))

2486 fieldnode += fieldbody

2487 if indented:

2488 self.nested_parse(indented, input_offset=line_offset,

2489 node=fieldbody)

2490 return fieldnode, blank_finish

2491

2492

2493class SpecializedBody(Body):

2494

2495 """

2496 Superclass for second and subsequent compound element members. Compound

2497 elements are lists and list-like constructs.

2498

2499 All transition methods are disabled (redefined as `invalid_input`).

2500 Override individual methods in subclasses to re-enable.

2501

2502 For example, once an initial bullet list item, say, is recognized, the

2503 `BulletList` subclass takes over, with a "bullet_list" node as its

2504 container. Upon encountering the initial bullet list item, `Body.bullet`

2505 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which

2506 starts up a nested parsing session with `BulletList` as the initial state.

2507 Only the ``bullet`` transition method is enabled in `BulletList`; as long

2508 as only bullet list items are encountered, they are parsed and inserted

2509 into the container. The first construct which is *not* a bullet list item

2510 triggers the `invalid_input` method, which ends the nested parse and

2511 closes the container. `BulletList` needs to recognize input that is

2512 invalid in the context of a bullet list, which means everything *other

2513 than* bullet list items, so it inherits the transition list created in

2514 `Body`.

2515 """

2516

2517 def invalid_input(self, match=None, context=None, next_state=None):

2518 """Not a compound element member. Abort this state machine."""

2519 self.state_machine.previous_line() # back up so parent SM can reassess

2520 raise EOFError

2521

2522 indent = invalid_input

2523 bullet = invalid_input

2524 enumerator = invalid_input

2525 field_marker = invalid_input

2526 option_marker = invalid_input

2527 doctest = invalid_input

2528 line_block = invalid_input

2529 grid_table_top = invalid_input

2530 simple_table_top = invalid_input

2531 explicit_markup = invalid_input

2532 anonymous = invalid_input

2533 line = invalid_input

2534 text = invalid_input

2535

2536

2537class BulletList(SpecializedBody):

2538

2539 """Second and subsequent bullet_list list_items."""

2540

2541 def bullet(self, match, context, next_state):

2542 """Bullet list item."""

2543 if match.string[0] != self.parent['bullet']:

2544 # different bullet: new list

2545 self.invalid_input()

2546 listitem, blank_finish = self.list_item(match.end())

2547 self.parent += listitem

2548 self.blank_finish = blank_finish

2549 return [], next_state, []

2550

2551

2552class DefinitionList(SpecializedBody):

2553

2554 """Second and subsequent definition_list_items."""

2555

2556 def text(self, match, context, next_state):

2557 """Definition lists."""

2558 return [match.string], 'Definition', []

2559

2560

2561class EnumeratedList(SpecializedBody):

2562

2563 """Second and subsequent enumerated_list list_items."""

2564

2565 def enumerator(self, match, context, next_state):

2566 """Enumerated list item."""

2567 format, sequence, text, ordinal = self.parse_enumerator(

2568 match, self.parent['enumtype'])

2569 if (format != self.format

2570 or (sequence != '#' and (sequence != self.parent['enumtype']

2571 or self.auto

2572 or ordinal != (self.lastordinal + 1)))

2573 or not self.is_enumerated_list_item(ordinal, sequence, format)):

2574 # different enumeration: new list

2575 self.invalid_input()

2576 if sequence == '#':

2577 self.auto = 1

2578 listitem, blank_finish = self.list_item(match.end())

2579 self.parent += listitem

2580 self.blank_finish = blank_finish

2581 self.lastordinal = ordinal

2582 return [], next_state, []

2583

2584

2585class FieldList(SpecializedBody):

2586

2587 """Second and subsequent field_list fields."""

2588

2589 def field_marker(self, match, context, next_state):

2590 """Field list field."""

2591 field, blank_finish = self.field(match)

2592 self.parent += field

2593 self.blank_finish = blank_finish

2594 return [], next_state, []

2595

2596

2597class OptionList(SpecializedBody):

2598

2599 """Second and subsequent option_list option_list_items."""

2600

2601 def option_marker(self, match, context, next_state):

2602 """Option list item."""

2603 try:

2604 option_list_item, blank_finish = self.option_list_item(match)

2605 except MarkupError:

2606 self.invalid_input()

2607 self.parent += option_list_item

2608 self.blank_finish = blank_finish

2609 return [], next_state, []

2610

2611

2612class RFC2822List(SpecializedBody, RFC2822Body):

2613

2614 """Second and subsequent RFC2822-style field_list fields."""

2615

2616 patterns = RFC2822Body.patterns

2617 initial_transitions = RFC2822Body.initial_transitions

2618

2619 def rfc2822(self, match, context, next_state):

2620 """RFC2822-style field list item."""

2621 field, blank_finish = self.rfc2822_field(match)

2622 self.parent += field

2623 self.blank_finish = blank_finish

2624 return [], 'RFC2822List', []

2625

2626 blank = SpecializedBody.invalid_input

2627

2628

2629class ExtensionOptions(FieldList):

2630

2631 """

2632 Parse field_list fields for extension options.

2633

2634 No nested parsing is done (including inline markup parsing).

2635 """

2636

2637 def parse_field_body(self, indented, offset, node) -> None:

2638 """Override `Body.parse_field_body` for simpler parsing."""

2639 lines = []

2640 for line in list(indented) + ['']:

2641 if line.strip():

2642 lines.append(line)

2643 elif lines:

2644 text = '\n'.join(lines)

2645 node += nodes.paragraph(text, text)

2646 lines = []

2647

2648

2649class LineBlock(SpecializedBody):

2650

2651 """Second and subsequent lines of a line_block."""

2652

2653 blank = SpecializedBody.invalid_input

2654

2655 def line_block(self, match, context, next_state):

2656 """New line of line block."""

2657 lineno = self.state_machine.abs_line_number()

2658 line, messages, blank_finish = self.line_block_line(match, lineno)

2659 self.parent += line

2660 self.parent.parent += messages

2661 self.blank_finish = blank_finish

2662 return [], next_state, []

2663

2664

2665class Explicit(SpecializedBody):

2666

2667 """Second and subsequent explicit markup construct."""

2668

2669 def explicit_markup(self, match, context, next_state):

2670 """Footnotes, hyperlink targets, directives, comments."""

2671 nodelist, blank_finish = self.explicit_construct(match)

2672 self.parent += nodelist

2673 self.blank_finish = blank_finish

2674 return [], next_state, []

2675

2676 def anonymous(self, match, context, next_state):

2677 """Anonymous hyperlink targets."""

2678 nodelist, blank_finish = self.anonymous_target(match)

2679 self.parent += nodelist

2680 self.blank_finish = blank_finish

2681 return [], next_state, []

2682

2683 blank = SpecializedBody.invalid_input

2684

2685

2686class SubstitutionDef(Body):

2687

2688 """

2689 Parser for the contents of a substitution_definition element.

2690 """

2691

2692 patterns = {

2693 'embedded_directive': re.compile(r'(%s)::( +|$)'

2694 % Inliner.simplename),

2695 'text': r''}

2696 initial_transitions = ['embedded_directive', 'text']

2697

2698 def embedded_directive(self, match, context, next_state):

2699 nodelist, blank_finish = self.directive(match,

2700 alt=self.parent['names'][0])

2701 self.parent += nodelist

2702 if not self.state_machine.at_eof():

2703 self.blank_finish = blank_finish

2704 raise EOFError

2705

2706 def text(self, match, context, next_state):

2707 if not self.state_machine.at_eof():

2708 self.blank_finish = self.state_machine.is_next_line_blank()

2709 raise EOFError

2710

2711

2712class Text(RSTState):

2713

2714 """

2715 Classifier of second line of a text block.

2716

2717 Could be a paragraph, a definition list item, or a title.

2718 """

2719

2720 patterns = {'underline': Body.patterns['line'],

2721 'text': r''}

2722 initial_transitions = [('underline', 'Body'), ('text', 'Body')]

2723

2724 def blank(self, match, context, next_state):

2725 """End of paragraph."""

2726 # NOTE: self.paragraph returns [node, system_message(s)], literalnext

2727 paragraph, literalnext = self.paragraph(

2728 context, self.state_machine.abs_line_number() - 1)

2729 self.parent += paragraph

2730 if literalnext:

2731 self.parent += self.literal_block()

2732 return [], 'Body', []

2733

2734 def eof(self, context):

2735 if context:

2736 self.blank(None, context, None)

2737 return []

2738

2739 def indent(self, match, context, next_state):

2740 """Definition list item."""

2741 dl = nodes.definition_list()

2742 # the definition list starts on the line before the indent:

2743 lineno = self.state_machine.abs_line_number() - 1

2744 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)

2745 dl_item, blank_finish = self.definition_list_item(context)

2746 dl += dl_item

2747 self.parent += dl

2748 offset = self.state_machine.line_offset + 1 # next line

2749 newline_offset, blank_finish = self.nested_list_parse(

2750 self.state_machine.input_lines[offset:],

2751 input_offset=self.state_machine.abs_line_offset() + 1,

2752 node=dl, initial_state='DefinitionList',

2753 blank_finish=blank_finish, blank_finish_state='Definition')

2754 self.goto_line(newline_offset)

2755 if not blank_finish:

2756 self.parent += self.unindent_warning('Definition list')

2757 return [], 'Body', []

2758

2759 def underline(self, match, context, next_state):

2760 """Section title."""

2761 lineno = self.state_machine.abs_line_number()

2762 title = context[0].rstrip()

2763 underline = match.string.rstrip()

2764 source = title + '\n' + underline

2765 messages = []

2766 if column_width(title) > len(underline):

2767 if len(underline) < 4:

2768 if self.state_machine.match_titles:

2769 msg = self.reporter.info(

2770 'Possible title underline, too short for the title.\n'

2771 "Treating it as ordinary text because it's so short.",

2772 line=lineno)

2773 self.parent += msg

2774 raise statemachine.TransitionCorrection('text')

2775 else:

2776 blocktext = context[0] + '\n' + self.state_machine.line

2777 msg = self.reporter.warning(

2778 'Title underline too short.',

2779 nodes.literal_block(blocktext, blocktext),

2780 line=lineno)

2781 messages.append(msg)

2782 if not self.state_machine.match_titles:

2783 blocktext = context[0] + '\n' + self.state_machine.line

2784 # We need get_source_and_line() here to report correctly

2785 src, srcline = self.state_machine.get_source_and_line()

2786 # TODO: why is abs_line_number() == srcline+1

2787 # if the error is in a table (try with test_tables.py)?

2788 # print("get_source_and_line", srcline)

2789 # print("abs_line_number", self.state_machine.abs_line_number())

2790 msg = self.reporter.severe(

2791 'Unexpected section title.',

2792 nodes.literal_block(blocktext, blocktext),

2793 source=src, line=srcline)

2794 self.parent += messages

2795 self.parent += msg

2796 return [], next_state, []

2797 style = underline[0]

2798 context[:] = []

2799 self.section(title, source, style, lineno - 1, messages)

2800 return [], next_state, []

2801

2802 def text(self, match, context, next_state):

2803 """Paragraph."""

2804 startline = self.state_machine.abs_line_number() - 1

2805 msg = None

2806 try:

2807 block = self.state_machine.get_text_block(flush_left=True)

2808 except statemachine.UnexpectedIndentationError as err:

2809 block, src, srcline = err.args

2810 msg = self.reporter.error('Unexpected indentation.',

2811 source=src, line=srcline)

2812 lines = context + list(block)

2813 paragraph, literalnext = self.paragraph(lines, startline)

2814 self.parent += paragraph

2815 self.parent += msg

2816 if literalnext:

2817 try:

2818 self.state_machine.next_line()

2819 except EOFError:

2820 pass

2821 self.parent += self.literal_block()

2822 return [], next_state, []

2823

2824 def literal_block(self):

2825 """Return a list of nodes."""

2826 (indented, indent, offset, blank_finish

2827 ) = self.state_machine.get_indented()

2828 while indented and not indented[-1].strip():

2829 indented.trim_end()

2830 if not indented:

2831 return self.quoted_literal_block()

2832 data = '\n'.join(indented)

2833 literal_block = nodes.literal_block(data, data)

2834 (literal_block.source,

2835 literal_block.line) = self.state_machine.get_source_and_line(offset+1)

2836 nodelist = [literal_block]

2837 if not blank_finish:

2838 nodelist.append(self.unindent_warning('Literal block'))

2839 return nodelist

2840

2841 def quoted_literal_block(self):

2842 abs_line_offset = self.state_machine.abs_line_offset()

2843 offset = self.state_machine.line_offset

2844 parent_node = nodes.Element()

2845 new_abs_offset = self.nested_parse(

2846 self.state_machine.input_lines[offset:],

2847 input_offset=abs_line_offset, node=parent_node, match_titles=False,

2848 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),

2849 'initial_state': 'QuotedLiteralBlock'})

2850 self.goto_line(new_abs_offset)

2851 return parent_node.children

2852

2853 def definition_list_item(self, termline):

2854 # the parser is already on the second (indented) line:

2855 dd_lineno = self.state_machine.abs_line_number()

2856 dt_lineno = dd_lineno - 1

2857 (indented, indent, line_offset, blank_finish

2858 ) = self.state_machine.get_indented()

2859 dl_item = nodes.definition_list_item(

2860 '\n'.join(termline + list(indented)))

2861 (dl_item.source,

2862 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)

2863 dt_nodes, messages = self.term(termline, dt_lineno)

2864 dl_item += dt_nodes

2865 dd = nodes.definition('', *messages)

2866 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)

2867 dl_item += dd

2868 if termline[0][-2:] == '::':

2869 dd += self.reporter.info(

2870 'Blank line missing before literal block (after the "::")? '

2871 'Interpreted as a definition list item.',

2872 line=dd_lineno)

2873 # TODO: drop a definition if it is an empty comment to allow

2874 # definition list items with several terms?

2875 # https://sourceforge.net/p/docutils/feature-requests/60/

2876 self.nested_parse(indented, input_offset=line_offset, node=dd)

2877 return dl_item, blank_finish

2878

2879 classifier_delimiter = re.compile(' +: +')

2880

2881 def term(self, lines, lineno):

2882 """Return a definition_list's term and optional classifiers."""

2883 assert len(lines) == 1

2884 text_nodes, messages = self.inline_text(lines[0], lineno)

2885 dt = nodes.term(lines[0])

2886 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)

2887 node_list = [dt]

2888 for i in range(len(text_nodes)):

2889 node = text_nodes[i]

2890 if isinstance(node, nodes.Text):

2891 parts = self.classifier_delimiter.split(node)

2892 if len(parts) == 1:

2893 node_list[-1] += node

2894 else:

2895 text = parts[0].rstrip()

2896 textnode = nodes.Text(text)

2897 node_list[-1] += textnode

2898 node_list += [nodes.classifier(unescape(part, True), part)

2899 for part in parts[1:]]

2900 else:

2901 node_list[-1] += node

2902 return node_list, messages

2903

2904

2905class SpecializedText(Text):

2906

2907 """

2908 Superclass for second and subsequent lines of Text-variants.

2909

2910 All transition methods are disabled. Override individual methods in

2911 subclasses to re-enable.

2912 """

2913

2914 def eof(self, context):

2915 """Incomplete construct."""

2916 return []

2917

2918 def invalid_input(self, match=None, context=None, next_state=None):

2919 """Not a compound element member. Abort this state machine."""

2920 raise EOFError

2921

2922 blank = invalid_input

2923 indent = invalid_input

2924 underline = invalid_input

2925 text = invalid_input

2926

2927

2928class Definition(SpecializedText):

2929

2930 """Second line of potential definition_list_item."""

2931

2932 def eof(self, context):

2933 """Not a definition."""

2934 self.state_machine.previous_line(2) # so parent SM can reassess

2935 return []

2936

2937 def indent(self, match, context, next_state):

2938 """Definition list item."""

2939 dl_item, blank_finish = self.definition_list_item(context)

2940 self.parent += dl_item

2941 self.blank_finish = blank_finish

2942 return [], 'DefinitionList', []

2943

2944

2945class Line(SpecializedText):

2946

2947 """

2948 Second line of over- & underlined section title or transition marker.

2949 """

2950

2951 eofcheck = 1 # @@@ ???

2952 """Set to 0 while parsing sections, so that we don't catch the EOF."""

2953

2954 def eof(self, context):

2955 """Transition marker at end of section or document."""

2956 marker = context[0].strip()

2957 if self.memo.section_bubble_up_kludge:

2958 self.memo.section_bubble_up_kludge = False

2959 elif len(marker) < 4:

2960 self.state_correction(context)

2961 if self.eofcheck: # ignore EOFError with sections

2962 src, srcline = self.state_machine.get_source_and_line()

2963 # lineno = self.state_machine.abs_line_number() - 1

2964 transition = nodes.transition(rawsource=context[0])

2965 transition.source = src

2966 transition.line = srcline - 1

2967 # transition.line = lineno

2968 self.parent += transition

2969 self.eofcheck = 1

2970 return []

2971

2972 def blank(self, match, context, next_state):

2973 """Transition marker."""

2974 src, srcline = self.state_machine.get_source_and_line()

2975 marker = context[0].strip()

2976 if len(marker) < 4:

2977 self.state_correction(context)

2978 transition = nodes.transition(rawsource=marker)

2979 transition.source = src

2980 transition.line = srcline - 1

2981 self.parent += transition

2982 return [], 'Body', []

2983

2984 def text(self, match, context, next_state):

2985 """Potential over- & underlined title."""

2986 lineno = self.state_machine.abs_line_number() - 1

2987 overline = context[0]

2988 title = match.string

2989 underline = ''

2990 try:

2991 underline = self.state_machine.next_line()

2992 except EOFError:

2993 blocktext = overline + '\n' + title

2994 if len(overline.rstrip()) < 4:

2995 self.short_overline(context, blocktext, lineno, 2)

2996 else:

2997 msg = self.reporter.severe(

2998 'Incomplete section title.',

2999 nodes.literal_block(blocktext, blocktext),

3000 line=lineno)

3001 self.parent += msg

3002 return [], 'Body', []

3003 source = '%s\n%s\n%s' % (overline, title, underline)

3004 overline = overline.rstrip()

3005 underline = underline.rstrip()

3006 if not self.transitions['underline'][0].match(underline):

3007 blocktext = overline + '\n' + title + '\n' + underline

3008 if len(overline.rstrip()) < 4:

3009 self.short_overline(context, blocktext, lineno, 2)

3010 else:

3011 msg = self.reporter.severe(

3012 'Missing matching underline for section title overline.',

3013 nodes.literal_block(source, source),

3014 line=lineno)

3015 self.parent += msg

3016 return [], 'Body', []

3017 elif overline != underline:

3018 blocktext = overline + '\n' + title + '\n' + underline

3019 if len(overline.rstrip()) < 4:

3020 self.short_overline(context, blocktext, lineno, 2)

3021 else:

3022 msg = self.reporter.severe(

3023 'Title overline & underline mismatch.',

3024 nodes.literal_block(source, source),

3025 line=lineno)

3026 self.parent += msg

3027 return [], 'Body', []

3028 title = title.rstrip()

3029 messages = []

3030 if column_width(title) > len(overline):

3031 blocktext = overline + '\n' + title + '\n' + underline

3032 if len(overline.rstrip()) < 4:

3033 self.short_overline(context, blocktext, lineno, 2)

3034 else:

3035 msg = self.reporter.warning(

3036 'Title overline too short.',

3037 nodes.literal_block(source, source),

3038 line=lineno)

3039 messages.append(msg)

3040 style = (overline[0], underline[0])

3041 self.eofcheck = 0 # @@@ not sure this is correct

3042 self.section(title.lstrip(), source, style, lineno + 1, messages)

3043 self.eofcheck = 1

3044 return [], 'Body', []

3045

3046 indent = text # indented title

3047

3048 def underline(self, match, context, next_state):

3049 overline = context[0]

3050 blocktext = overline + '\n' + self.state_machine.line

3051 lineno = self.state_machine.abs_line_number() - 1

3052 if len(overline.rstrip()) < 4:

3053 self.short_overline(context, blocktext, lineno, 1)

3054 msg = self.reporter.error(

3055 'Invalid section title or transition marker.',

3056 nodes.literal_block(blocktext, blocktext),

3057 line=lineno)

3058 self.parent += msg

3059 return [], 'Body', []

3060

3061 def short_overline(self, context, blocktext, lineno, lines=1) -> None:

3062 msg = self.reporter.info(

3063 'Possible incomplete section title.\nTreating the overline as '

3064 "ordinary text because it's so short.",

3065 line=lineno)

3066 self.parent += msg

3067 self.state_correction(context, lines)

3068

3069 def state_correction(self, context, lines=1):

3070 self.state_machine.previous_line(lines)

3071 context[:] = []

3072 raise statemachine.StateCorrection('Body', 'text')

3073

3074

3075class QuotedLiteralBlock(RSTState):

3076

3077 """

3078 Nested parse handler for quoted (unindented) literal blocks.

3079

3080 Special-purpose. Not for inclusion in `state_classes`.

3081 """

3082

3083 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,

3084 'text': r''}

3085 initial_transitions = ('initial_quoted', 'text')

3086

3087 def __init__(self, state_machine, debug=False) -> None:

3088 RSTState.__init__(self, state_machine, debug)

3089 self.messages = []

3090 self.initial_lineno = None

3091

3092 def blank(self, match, context, next_state):

3093 if context:

3094 raise EOFError

3095 else:

3096 return context, next_state, []

3097

3098 def eof(self, context):

3099 if context:

3100 src, srcline = self.state_machine.get_source_and_line(

3101 self.initial_lineno)

3102 text = '\n'.join(context)

3103 literal_block = nodes.literal_block(text, text)

3104 literal_block.source = src

3105 literal_block.line = srcline

3106 self.parent += literal_block

3107 else:

3108 self.parent += self.reporter.warning(

3109 'Literal block expected; none found.',

3110 line=self.state_machine.abs_line_number()

3111 ) # src not available, statemachine.input_lines is empty

3112 self.state_machine.previous_line()

3113 self.parent += self.messages

3114 return []

3115

3116 def indent(self, match, context, next_state):

3117 assert context, ('QuotedLiteralBlock.indent: context should not '

3118 'be empty!')

3119 self.messages.append(

3120 self.reporter.error('Unexpected indentation.',

3121 line=self.state_machine.abs_line_number()))

3122 self.state_machine.previous_line()

3123 raise EOFError

3124

3125 def initial_quoted(self, match, context, next_state):

3126 """Match arbitrary quote character on the first line only."""

3127 self.remove_transition('initial_quoted')

3128 quote = match.string[0]

3129 pattern = re.compile(re.escape(quote))

3130 # New transition matches consistent quotes only:

3131 self.add_transition('quoted',

3132 (pattern, self.quoted, self.__class__.__name__))

3133 self.initial_lineno = self.state_machine.abs_line_number()

3134 return [match.string], next_state, []

3135

3136 def quoted(self, match, context, next_state):

3137 """Match consistent quotes on subsequent lines."""

3138 context.append(match.string)

3139 return context, next_state, []

3140

3141 def text(self, match, context, next_state):

3142 if context:

3143 self.messages.append(

3144 self.reporter.error('Inconsistent literal block quoting.',

3145 line=self.state_machine.abs_line_number()))

3146 self.state_machine.previous_line()

3147 raise EOFError

3148

3149

3150state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,

3151 OptionList, LineBlock, ExtensionOptions, Explicit, Text,

3152 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)

3153"""Standard set of State classes used to start `RSTStateMachine`."""