Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/docutils/parsers/rst/states.py: 66%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Copyright: This module has been placed in the public domain.

5"""

6This is the ``docutils.parsers.rst.states`` module, the core of

7the reStructuredText parser. It defines the following:

9:Classes:

10 - `RSTStateMachine`: reStructuredText parser's entry point.

11 - `NestedStateMachine`: recursive StateMachine.

12 - `RSTState`: reStructuredText State superclass.

13 - `Inliner`: For parsing inline markup.

14 - `Body`: Generic classifier of the first line of a block.

15 - `SpecializedBody`: Superclass for compound element members.

16 - `BulletList`: Second and subsequent bullet_list list_items

17 - `DefinitionList`: Second+ definition_list_items.

18 - `EnumeratedList`: Second+ enumerated_list list_items.

19 - `FieldList`: Second+ fields.

20 - `OptionList`: Second+ option_list_items.

21 - `RFC2822List`: Second+ RFC2822-style fields.

22 - `ExtensionOptions`: Parses directive option fields.

23 - `Explicit`: Second+ explicit markup constructs.

24 - `SubstitutionDef`: For embedded directives in substitution definitions.

25 - `Text`: Classifier of second line of a text block.

26 - `SpecializedText`: Superclass for continuation lines of Text-variants.

27 - `Definition`: Second line of potential definition_list_item.

28 - `Line`: Second line of overlined section title or transition marker.

29 - `Struct`: An auxiliary collection class.

31:Exception classes:

32 - `MarkupError`

33 - `ParserError`

34 - `MarkupMismatch`

36:Functions:

37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.

38 - `unescape()`: Return a string, nulls removed or restored to backslashes.

40:Attributes:

41 - `state_classes`: set of State classes used with `RSTStateMachine`.

43Parser Overview

44===============

46The reStructuredText parser is implemented as a recursive state machine,

47examining its input one line at a time. To understand how the parser works,

48please first become familiar with the `docutils.statemachine` module. In the

49description below, references are made to classes defined in this module;

50please see the individual classes for details.

52Parsing proceeds as follows:

541. The state machine examines each line of input, checking each of the

55 transition patterns of the state `Body`, in order, looking for a match.

56 The implicit transitions (blank lines and indentation) are checked before

57 any others. The 'text' transition is a catch-all (matches anything).

592. The method associated with the matched transition pattern is called.

61 A. Some transition methods are self-contained, appending elements to the

62 document tree (`Body.doctest` parses a doctest block). The parser's

63 current line index is advanced to the end of the element, and parsing

64 continues with step 1.

66 B. Other transition methods trigger the creation of a nested state machine,

67 whose job is to parse a compound construct ('indent' does a block quote,

68 'bullet' does a bullet list, 'overline' does a section [first checking

69 for a valid section header], etc.).

71 - In the case of lists and explicit markup, a one-off state machine is

72 created and run to parse contents of the first item.

74 - A new state machine is created and its initial state is set to the

75 appropriate specialized state (`BulletList` in the case of the

76 'bullet' transition; see `SpecializedBody` for more detail). This

77 state machine is run to parse the compound element (or series of

78 explicit markup elements), and returns as soon as a non-member element

79 is encountered. For example, the `BulletList` state machine ends as

80 soon as it encounters an element which is not a list item of that

81 bullet list. The optional omission of inter-element blank lines is

82 enabled by this nested state machine.

84 - The current line index is advanced to the end of the elements parsed,

85 and parsing continues with step 1.

87 C. The result of the 'text' transition depends on the next line of text.

88 The current state is changed to `Text`, under which the second line is

89 examined. If the second line is:

91 - Indented: The element is a definition list item, and parsing proceeds

92 similarly to step 2.B, using the `DefinitionList` state.

94 - A line of uniform punctuation characters: The element is a section

95 header; again, parsing proceeds as in step 2.B, and `Body` is still

96 used.

98 - Anything else: The element is a paragraph, which is examined for

99 inline markup and appended to the parent element. Processing

100 continues with step 1.

101"""

102

103__docformat__ = 'reStructuredText'

104

105

106import re

107from types import FunctionType, MethodType

108

109from docutils import nodes, statemachine, utils

110from docutils import ApplicationError, DataError

111from docutils.statemachine import StateMachineWS, StateWS

112from docutils.nodes import fully_normalize_name as normalize_name

113from docutils.nodes import unescape, whitespace_normalize_name

114import docutils.parsers.rst

115from docutils.parsers.rst import directives, languages, tableparser, roles

116from docutils.utils import escape2null, column_width

117from docutils.utils import punctuation_chars, roman, urischemes

118from docutils.utils import split_escaped_whitespace

119

120

121class MarkupError(DataError): pass

122class UnknownInterpretedRoleError(DataError): pass

123class InterpretedRoleNotImplementedError(DataError): pass

124class ParserError(ApplicationError): pass

125class MarkupMismatch(Exception): pass

126

127

128class Struct:

129

130 """Stores data attributes for dotted-attribute access."""

131

132 def __init__(self, **keywordargs):

133 self.__dict__.update(keywordargs)

134

135

136class RSTStateMachine(StateMachineWS):

137

138 """

139 reStructuredText's master StateMachine.

140

141 The entry point to reStructuredText parsing is the `run()` method.

142 """

143

144 def run(self, input_lines, document, input_offset=0, match_titles=True,

145 inliner=None):

146 """

147 Parse `input_lines` and modify the `document` node in place.

148

149 Extend `StateMachineWS.run()`: set up parse-global data and

150 run the StateMachine.

151 """

152 self.language = languages.get_language(

153 document.settings.language_code, document.reporter)

154 self.match_titles = match_titles

155 if inliner is None:

156 inliner = Inliner()

157 inliner.init_customizations(document.settings)

158 self.memo = Struct(document=document,

159 reporter=document.reporter,

160 language=self.language,

161 title_styles=[],

162 section_level=0,

163 section_bubble_up_kludge=False,

164 inliner=inliner)

165 self.document = document

166 self.attach_observer(document.note_source)

167 self.reporter = self.memo.reporter

168 self.node = document

169 results = StateMachineWS.run(self, input_lines, input_offset,

170 input_source=document['source'])

171 assert results == [], 'RSTStateMachine.run() results should be empty!'

172 self.node = self.memo = None # remove unneeded references

173

174

175class NestedStateMachine(StateMachineWS):

176

177 """

178 StateMachine run from within other StateMachine runs, to parse nested

179 document structures.

180 """

181

182 def run(self, input_lines, input_offset, memo, node, match_titles=True):

183 """

184 Parse `input_lines` and populate a `docutils.nodes.document` instance.

185

186 Extend `StateMachineWS.run()`: set up document-wide data.

187 """

188 self.match_titles = match_titles

189 self.memo = memo

190 self.document = memo.document

191 self.attach_observer(self.document.note_source)

192 self.reporter = memo.reporter

193 self.language = memo.language

194 self.node = node

195 results = StateMachineWS.run(self, input_lines, input_offset)

196 assert results == [], ('NestedStateMachine.run() results should be '

197 'empty!')

198 return results

199

200

201class RSTState(StateWS):

202

203 """

204 reStructuredText State superclass.

205

206 Contains methods used by all State subclasses.

207 """

208

209 nested_sm = NestedStateMachine

210 nested_sm_cache = []

211

212 def __init__(self, state_machine, debug=False):

213 self.nested_sm_kwargs = {'state_classes': state_classes,

214 'initial_state': 'Body'}

215 StateWS.__init__(self, state_machine, debug)

216

217 def runtime_init(self):

218 StateWS.runtime_init(self)

219 memo = self.state_machine.memo

220 self.memo = memo

221 self.reporter = memo.reporter

222 self.inliner = memo.inliner

223 self.document = memo.document

224 self.parent = self.state_machine.node

225 # enable the reporter to determine source and source-line

226 if not hasattr(self.reporter, 'get_source_and_line'):

227 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501

228

229 def goto_line(self, abs_line_offset):

230 """

231 Jump to input line `abs_line_offset`, ignoring jumps past the end.

232 """

233 try:

234 self.state_machine.goto_line(abs_line_offset)

235 except EOFError:

236 pass

237

238 def no_match(self, context, transitions):

239 """

240 Override `StateWS.no_match` to generate a system message.

241

242 This code should never be run.

243 """

244 self.reporter.severe(

245 'Internal error: no transition pattern match. State: "%s"; '

246 'transitions: %s; context: %s; current line: %r.'

247 % (self.__class__.__name__, transitions, context,

248 self.state_machine.line))

249 return context, None, []

250

251 def bof(self, context):

252 """Called at beginning of file."""

253 return [], []

254

255 def nested_parse(self, block, input_offset, node, match_titles=False,

256 state_machine_class=None, state_machine_kwargs=None):

257 """

258 Create a new StateMachine rooted at `node` and run it over the input

259 `block`.

260 """

261 use_default = 0

262 if state_machine_class is None:

263 state_machine_class = self.nested_sm

264 use_default += 1

265 if state_machine_kwargs is None:

266 state_machine_kwargs = self.nested_sm_kwargs

267 use_default += 1

268 block_length = len(block)

269

270 state_machine = None

271 if use_default == 2:

272 try:

273 state_machine = self.nested_sm_cache.pop()

274 except IndexError:

275 pass

276 if not state_machine:

277 state_machine = state_machine_class(debug=self.debug,

278 **state_machine_kwargs)

279 state_machine.run(block, input_offset, memo=self.memo,

280 node=node, match_titles=match_titles)

281 if use_default == 2:

282 self.nested_sm_cache.append(state_machine)

283 else:

284 state_machine.unlink()

285 new_offset = state_machine.abs_line_offset()

286 # No `block.parent` implies disconnected -- lines aren't in sync:

287 if block.parent and (len(block) - block_length) != 0:

288 # Adjustment for block if modified in nested parse:

289 self.state_machine.next_line(len(block) - block_length)

290 return new_offset

291

292 def nested_list_parse(self, block, input_offset, node, initial_state,

293 blank_finish,

294 blank_finish_state=None,

295 extra_settings={},

296 match_titles=False,

297 state_machine_class=None,

298 state_machine_kwargs=None):

299 """

300 Create a new StateMachine rooted at `node` and run it over the input

301 `block`. Also keep track of optional intermediate blank lines and the

302 required final one.

303 """

304 if state_machine_class is None:

305 state_machine_class = self.nested_sm

306 if state_machine_kwargs is None:

307 state_machine_kwargs = self.nested_sm_kwargs.copy()

308 state_machine_kwargs['initial_state'] = initial_state

309 state_machine = state_machine_class(debug=self.debug,

310 **state_machine_kwargs)

311 if blank_finish_state is None:

312 blank_finish_state = initial_state

313 state_machine.states[blank_finish_state].blank_finish = blank_finish

314 for key, value in extra_settings.items():

315 setattr(state_machine.states[initial_state], key, value)

316 state_machine.run(block, input_offset, memo=self.memo,

317 node=node, match_titles=match_titles)

318 blank_finish = state_machine.states[blank_finish_state].blank_finish

319 state_machine.unlink()

320 return state_machine.abs_line_offset(), blank_finish

321

322 def section(self, title, source, style, lineno, messages):

323 """Check for a valid subsection and create one if it checks out."""

324 if self.check_subsection(source, style, lineno):

325 self.new_subsection(title, lineno, messages)

326

327 def check_subsection(self, source, style, lineno):

328 """

329 Check for a valid subsection header. Return True or False.

330

331 When a new section is reached that isn't a subsection of the current

332 section, back up the line count (use ``previous_line(-x)``), then

333 ``raise EOFError``. The current StateMachine will finish, then the

334 calling StateMachine can re-examine the title. This will work its way

335 back up the calling chain until the correct section level isreached.

336

337 @@@ Alternative: Evaluate the title, store the title info & level, and

338 back up the chain until that level is reached. Store in memo? Or

339 return in results?

340

341 :Exception: `EOFError` when a sibling or supersection encountered.

342 """

343 memo = self.memo

344 title_styles = memo.title_styles

345 mylevel = memo.section_level

346 try: # check for existing title style

347 level = title_styles.index(style) + 1

348 except ValueError: # new title style

349 if len(title_styles) == memo.section_level: # new subsection

350 title_styles.append(style)

351 return True

352 else: # not at lowest level

353 self.parent += self.title_inconsistent(source, lineno)

354 return False

355 if level <= mylevel: # sibling or supersection

356 memo.section_level = level # bubble up to parent section

357 if len(style) == 2:

358 memo.section_bubble_up_kludge = True

359 # back up 2 lines for underline title, 3 for overline title

360 self.state_machine.previous_line(len(style) + 1)

361 raise EOFError # let parent section re-evaluate

362 if level == mylevel + 1: # immediate subsection

363 return True

364 else: # invalid subsection

365 self.parent += self.title_inconsistent(source, lineno)

366 return False

367

368 def title_inconsistent(self, sourcetext, lineno):

369 error = self.reporter.severe(

370 'Title level inconsistent:', nodes.literal_block('', sourcetext),

371 line=lineno)

372 return error

373

374 def new_subsection(self, title, lineno, messages):

375 """Append new subsection to document tree. On return, check level."""

376 memo = self.memo

377 mylevel = memo.section_level

378 memo.section_level += 1

379 section_node = nodes.section()

380 self.parent += section_node

381 textnodes, title_messages = self.inline_text(title, lineno)

382 titlenode = nodes.title(title, '', *textnodes)

383 name = normalize_name(titlenode.astext())

384 section_node['names'].append(name)

385 section_node += titlenode

386 section_node += messages

387 section_node += title_messages

388 self.document.note_implicit_target(section_node, section_node)

389 offset = self.state_machine.line_offset + 1

390 absoffset = self.state_machine.abs_line_offset() + 1

391 newabsoffset = self.nested_parse(

392 self.state_machine.input_lines[offset:], input_offset=absoffset,

393 node=section_node, match_titles=True)

394 self.goto_line(newabsoffset)

395 if memo.section_level <= mylevel: # can't handle next section?

396 raise EOFError # bubble up to supersection

397 # reset section_level; next pass will detect it properly

398 memo.section_level = mylevel

399

400 def paragraph(self, lines, lineno):

401 """

402 Return a list (paragraph & messages) & a boolean: literal_block next?

403 """

404 data = '\n'.join(lines).rstrip()

405 if re.search(r'(?<!\\)(\\\\)*::$', data):

406 if len(data) == 2:

407 return [], 1

408 elif data[-3] in ' \n':

409 text = data[:-3].rstrip()

410 else:

411 text = data[:-1]

412 literalnext = 1

413 else:

414 text = data

415 literalnext = 0

416 textnodes, messages = self.inline_text(text, lineno)

417 p = nodes.paragraph(data, '', *textnodes)

418 p.source, p.line = self.state_machine.get_source_and_line(lineno)

419 return [p] + messages, literalnext

420

421 def inline_text(self, text, lineno):

422 """

423 Return 2 lists: nodes (text and inline elements), and system_messages.

424 """

425 nodes, messages = self.inliner.parse(text, lineno,

426 self.memo, self.parent)

427 return nodes, messages

428

429 def unindent_warning(self, node_name):

430 # the actual problem is one line below the current line

431 lineno = self.state_machine.abs_line_number() + 1

432 return self.reporter.warning('%s ends without a blank line; '

433 'unexpected unindent.' % node_name,

434 line=lineno)

435

436

437def build_regexp(definition, compile=True):

438 """

439 Build, compile and return a regular expression based on `definition`.

440

441 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),

442 where "parts" is a list of regular expressions and/or regular

443 expression definitions to be joined into an or-group.

444 """

445 name, prefix, suffix, parts = definition

446 part_strings = []

447 for part in parts:

448 if isinstance(part, tuple):

449 part_strings.append(build_regexp(part, None))

450 else:

451 part_strings.append(part)

452 or_group = '|'.join(part_strings)

453 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()

454 if compile:

455 return re.compile(regexp)

456 else:

457 return regexp

458

459

460class Inliner:

461

462 """

463 Parse inline markup; call the `parse()` method.

464 """

465

466 def __init__(self):

467 self.implicit_dispatch = []

468 """List of (pattern, bound method) tuples, used by

469 `self.implicit_inline`."""

470

471 def init_customizations(self, settings):

472 # lookahead and look-behind expressions for inline markup rules

473 if getattr(settings, 'character_level_inline_markup', False):

474 start_string_prefix = '(^|(?<!\x00))'

475 end_string_suffix = ''

476 else:

477 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %

478 (punctuation_chars.openers,

479 punctuation_chars.delimiters))

480 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %

481 (punctuation_chars.closing_delimiters,

482 punctuation_chars.delimiters,

483 punctuation_chars.closers))

484 args = locals().copy()

485 args.update(vars(self.__class__))

486

487 parts = ('initial_inline', start_string_prefix, '',

488 [

489 ('start', '', self.non_whitespace_after, # simple start-strings

490 [r'\*\*', # strong

491 r'\*(?!\*)', # emphasis but not strong

492 r'``', # literal

493 r'_`', # inline internal target

494 r'\|(?!\|)'] # substitution reference

495 ),

496 ('whole', '', end_string_suffix, # whole constructs

497 [ # reference name & end-string

498 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,

499 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',

500 [r'[0-9]+', # manually numbered

501 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)

502 r'\*', # auto-symbol

503 r'(?P<citationlabel>%s)' % self.simplename, # citation ref

504 ]

505 )

506 ]

507 ),

508 ('backquote', # interpreted text or phrase reference

509 '(?P<role>(:%s:)?)' % self.simplename, # optional role

510 self.non_whitespace_after,

511 ['`(?!`)'] # but not literal

512 )

513 ]

514 )

515 self.start_string_prefix = start_string_prefix

516 self.end_string_suffix = end_string_suffix

517 self.parts = parts

518

519 self.patterns = Struct(

520 initial=build_regexp(parts),

521 emphasis=re.compile(self.non_whitespace_escape_before

522 + r'(\*)' + end_string_suffix),

523 strong=re.compile(self.non_whitespace_escape_before

524 + r'(\*\*)' + end_string_suffix),

525 interpreted_or_phrase_ref=re.compile(

526 r"""

527 %(non_unescaped_whitespace_escape_before)s

528 (

529 `

530 (?P<suffix>

531 (?P<role>:%(simplename)s:)?

532 (?P<refend>__?)?

533 )

534 )

535 %(end_string_suffix)s

536 """ % args, re.VERBOSE),

537 embedded_link=re.compile(

538 r"""

539 (

540 (?:[ \n]+|^) # spaces or beginning of line/string

541 < # open bracket

542 %(non_whitespace_after)s

543 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets

544 %(non_whitespace_escape_before)s

545 > # close bracket

546 )

547 $ # end of string

548 """ % args, re.VERBOSE),

549 literal=re.compile(self.non_whitespace_before + '(``)'

550 + end_string_suffix),

551 target=re.compile(self.non_whitespace_escape_before

552 + r'(`)' + end_string_suffix),

553 substitution_ref=re.compile(self.non_whitespace_escape_before

554 + r'(\|_{0,2})'

555 + end_string_suffix),

556 email=re.compile(self.email_pattern % args + '$',

557 re.VERBOSE),

558 uri=re.compile(

559 (r"""

560 %(start_string_prefix)s

561 (?P<whole>

562 (?P<absolute> # absolute URI

563 (?P<scheme> # scheme (http, ftp, mailto)

564 [a-zA-Z][a-zA-Z0-9.+-]*

565 )

566 :

567 (

568 ( # either:

569 (//?)? # hierarchical URI

570 %(uric)s* # URI characters

571 %(uri_end)s # final URI char

572 )

573 ( # optional query

574 \?%(uric)s*

575 %(uri_end)s

576 )?

577 ( # optional fragment

578 \#%(uric)s*

579 %(uri_end)s

580 )?

581 )

582 )

583 | # *OR*

584 (?P<email> # email address

585 """ + self.email_pattern + r"""

586 )

587 )

588 %(end_string_suffix)s

589 """) % args, re.VERBOSE),

590 pep=re.compile(

591 r"""

592 %(start_string_prefix)s

593 (

594 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file

595 |

596 (PEP\s+(?P<pepnum2>\d+)) # reference by name

597 )

598 %(end_string_suffix)s""" % args, re.VERBOSE),

599 rfc=re.compile(

600 r"""

601 %(start_string_prefix)s

602 (RFC(-|\s+)?(?P<rfcnum>\d+))

603 %(end_string_suffix)s""" % args, re.VERBOSE))

604

605 self.implicit_dispatch.append((self.patterns.uri,

606 self.standalone_uri))

607 if settings.pep_references:

608 self.implicit_dispatch.append((self.patterns.pep,

609 self.pep_reference))

610 if settings.rfc_references:

611 self.implicit_dispatch.append((self.patterns.rfc,

612 self.rfc_reference))

613

614 def parse(self, text, lineno, memo, parent):

615 # Needs to be refactored for nested inline markup.

616 # Add nested_parse() method?

617 """

618 Return 2 lists: nodes (text and inline elements), and system_messages.

619

620 Using `self.patterns.initial`, a pattern which matches start-strings

621 (emphasis, strong, interpreted, phrase reference, literal,

622 substitution reference, and inline target) and complete constructs

623 (simple reference, footnote reference), search for a candidate. When

624 one is found, check for validity (e.g., not a quoted '*' character).

625 If valid, search for the corresponding end string if applicable, and

626 check it for validity. If not found or invalid, generate a warning

627 and ignore the start-string. Implicit inline markup (e.g. standalone

628 URIs) is found last.

629

630 :text: source string

631 :lineno: absolute line number (cf. statemachine.get_source_and_line())

632 """

633 self.reporter = memo.reporter

634 self.document = memo.document

635 self.language = memo.language

636 self.parent = parent

637 pattern_search = self.patterns.initial.search

638 dispatch = self.dispatch

639 remaining = escape2null(text)

640 processed = []

641 unprocessed = []

642 messages = []

643 while remaining:

644 match = pattern_search(remaining)

645 if match:

646 groups = match.groupdict()

647 method = dispatch[groups['start'] or groups['backquote']

648 or groups['refend'] or groups['fnend']]

649 before, inlines, remaining, sysmessages = method(self, match,

650 lineno)

651 unprocessed.append(before)

652 messages += sysmessages

653 if inlines:

654 processed += self.implicit_inline(''.join(unprocessed),

655 lineno)

656 processed += inlines

657 unprocessed = []

658 else:

659 break

660 remaining = ''.join(unprocessed) + remaining

661 if remaining:

662 processed += self.implicit_inline(remaining, lineno)

663 return processed, messages

664

665 # Inline object recognition

666 # -------------------------

667 # See also init_customizations().

668 non_whitespace_before = r'(?<!\s)'

669 non_whitespace_escape_before = r'(?<![\s\x00])'

670 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'

671 non_whitespace_after = r'(?!\s)'

672 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):

673 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'

674 # Valid URI characters (see RFC 2396 & RFC 2732);

675 # final \x00 allows backslash escapes in URIs:

676 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""

677 # Delimiter indicating the end of a URI (not part of the URI):

678 uri_end_delim = r"""[>]"""

679 # Last URI character; same as uric but no punctuation:

680 urilast = r"""[_~*/=+a-zA-Z0-9]"""

681 # End of a URI (either 'urilast' or 'uric followed by a

682 # uri_end_delim'):

683 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()

684 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""

685 email_pattern = r"""

686 %(emailc)s+(?:\.%(emailc)s+)* # name

687 (?<!\x00)@ # at

688 %(emailc)s+(?:\.%(emailc)s*)* # host

689 %(uri_end)s # final URI char

690 """

691

692 def quoted_start(self, match):

693 """Test if inline markup start-string is 'quoted'.

694

695 'Quoted' in this context means the start-string is enclosed in a pair

696 of matching opening/closing delimiters (not necessarily quotes)

697 or at the end of the match.

698 """

699 string = match.string

700 start = match.start()

701 if start == 0: # start-string at beginning of text

702 return False

703 prestart = string[start - 1]

704 try:

705 poststart = string[match.end()]

706 except IndexError: # start-string at end of text

707 return True # not "quoted" but no markup start-string either

708 return punctuation_chars.match_chars(prestart, poststart)

709

710 def inline_obj(self, match, lineno, end_pattern, nodeclass,

711 restore_backslashes=False):

712 string = match.string

713 matchstart = match.start('start')

714 matchend = match.end('start')

715 if self.quoted_start(match):

716 return string[:matchend], [], string[matchend:], [], ''

717 endmatch = end_pattern.search(string[matchend:])

718 if endmatch and endmatch.start(1): # 1 or more chars

719 text = endmatch.string[:endmatch.start(1)]

720 if restore_backslashes:

721 text = unescape(text, True)

722 textend = matchend + endmatch.end(1)

723 rawsource = unescape(string[matchstart:textend], True)

724 node = nodeclass(rawsource, text)

725 return (string[:matchstart], [node],

726 string[textend:], [], endmatch.group(1))

727 msg = self.reporter.warning(

728 'Inline %s start-string without end-string.'

729 % nodeclass.__name__, line=lineno)

730 text = unescape(string[matchstart:matchend], True)

731 prb = self.problematic(text, text, msg)

732 return string[:matchstart], [prb], string[matchend:], [msg], ''

733

734 def problematic(self, text, rawsource, message):

735 msgid = self.document.set_id(message, self.parent)

736 problematic = nodes.problematic(rawsource, text, refid=msgid)

737 prbid = self.document.set_id(problematic)

738 message.add_backref(prbid)

739 return problematic

740

741 def emphasis(self, match, lineno):

742 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

743 match, lineno, self.patterns.emphasis, nodes.emphasis)

744 return before, inlines, remaining, sysmessages

745

746 def strong(self, match, lineno):

747 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

748 match, lineno, self.patterns.strong, nodes.strong)

749 return before, inlines, remaining, sysmessages

750

751 def interpreted_or_phrase_ref(self, match, lineno):

752 end_pattern = self.patterns.interpreted_or_phrase_ref

753 string = match.string

754 matchstart = match.start('backquote')

755 matchend = match.end('backquote')

756 rolestart = match.start('role')

757 role = match.group('role')

758 position = ''

759 if role:

760 role = role[1:-1]

761 position = 'prefix'

762 elif self.quoted_start(match):

763 return string[:matchend], [], string[matchend:], []

764 endmatch = end_pattern.search(string[matchend:])

765 if endmatch and endmatch.start(1): # 1 or more chars

766 textend = matchend + endmatch.end()

767 if endmatch.group('role'):

768 if role:

769 msg = self.reporter.warning(

770 'Multiple roles in interpreted text (both '

771 'prefix and suffix present; only one allowed).',

772 line=lineno)

773 text = unescape(string[rolestart:textend], True)

774 prb = self.problematic(text, text, msg)

775 return string[:rolestart], [prb], string[textend:], [msg]

776 role = endmatch.group('suffix')[1:-1]

777 position = 'suffix'

778 escaped = endmatch.string[:endmatch.start(1)]

779 rawsource = unescape(string[matchstart:textend], True)

780 if rawsource[-1:] == '_':

781 if role:

782 msg = self.reporter.warning(

783 'Mismatch: both interpreted text role %s and '

784 'reference suffix.' % position, line=lineno)

785 text = unescape(string[rolestart:textend], True)

786 prb = self.problematic(text, text, msg)

787 return string[:rolestart], [prb], string[textend:], [msg]

788 return self.phrase_ref(string[:matchstart], string[textend:],

789 rawsource, escaped)

790 else:

791 rawsource = unescape(string[rolestart:textend], True)

792 nodelist, messages = self.interpreted(rawsource, escaped, role,

793 lineno)

794 return (string[:rolestart], nodelist,

795 string[textend:], messages)

796 msg = self.reporter.warning(

797 'Inline interpreted text or phrase reference start-string '

798 'without end-string.', line=lineno)

799 text = unescape(string[matchstart:matchend], True)

800 prb = self.problematic(text, text, msg)

801 return string[:matchstart], [prb], string[matchend:], [msg]

802

803 def phrase_ref(self, before, after, rawsource, escaped, text=None):

804 # `text` is ignored (since 0.16)

805 match = self.patterns.embedded_link.search(escaped)

806 if match: # embedded <URI> or <alias_>

807 text = escaped[:match.start(0)]

808 unescaped = unescape(text)

809 rawtext = unescape(text, True)

810 aliastext = match.group(2)

811 rawaliastext = unescape(aliastext, True)

812 underscore_escaped = rawaliastext.endswith(r'\_')

813 if (aliastext.endswith('_')

814 and not (underscore_escaped

815 or self.patterns.uri.match(aliastext))):

816 aliastype = 'name'

817 alias = normalize_name(unescape(aliastext[:-1]))

818 target = nodes.target(match.group(1), refname=alias)

819 target.indirect_reference_name = whitespace_normalize_name(

820 unescape(aliastext[:-1]))

821 else:

822 aliastype = 'uri'

823 # remove unescaped whitespace

824 alias_parts = split_escaped_whitespace(match.group(2))

825 alias = ' '.join(''.join(part.split())

826 for part in alias_parts)

827 alias = self.adjust_uri(unescape(alias))

828 if alias.endswith(r'\_'):

829 alias = alias[:-2] + '_'

830 target = nodes.target(match.group(1), refuri=alias)

831 target.referenced = 1

832 if not aliastext:

833 raise ApplicationError('problem with embedded link: %r'

834 % aliastext)

835 if not text:

836 text = alias

837 unescaped = unescape(text)

838 rawtext = rawaliastext

839 else:

840 text = escaped

841 unescaped = unescape(text)

842 target = None

843 rawtext = unescape(escaped, True)

844

845 refname = normalize_name(unescaped)

846 reference = nodes.reference(rawsource, text,

847 name=whitespace_normalize_name(unescaped))

848 reference[0].rawsource = rawtext

849

850 node_list = [reference]

851

852 if rawsource[-2:] == '__':

853 if target and (aliastype == 'name'):

854 reference['refname'] = alias

855 self.document.note_refname(reference)

856 # self.document.note_indirect_target(target) # required?

857 elif target and (aliastype == 'uri'):

858 reference['refuri'] = alias

859 else:

860 reference['anonymous'] = 1

861 else:

862 if target:

863 target['names'].append(refname)

864 if aliastype == 'name':

865 reference['refname'] = alias

866 self.document.note_indirect_target(target)

867 self.document.note_refname(reference)

868 else:

869 reference['refuri'] = alias

870 self.document.note_explicit_target(target, self.parent)

871 # target.note_referenced_by(name=refname)

872 node_list.append(target)

873 else:

874 reference['refname'] = refname

875 self.document.note_refname(reference)

876 return before, node_list, after, []

877

878 def adjust_uri(self, uri):

879 match = self.patterns.email.match(uri)

880 if match:

881 return 'mailto:' + uri

882 else:

883 return uri

884

885 def interpreted(self, rawsource, text, role, lineno):

886 role_fn, messages = roles.role(role, self.language, lineno,

887 self.reporter)

888 if role_fn:

889 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)

890 return nodes, messages + messages2

891 else:

892 msg = self.reporter.error(

893 'Unknown interpreted text role "%s".' % role,

894 line=lineno)

895 return ([self.problematic(rawsource, rawsource, msg)],

896 messages + [msg])

897

898 def literal(self, match, lineno):

899 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

900 match, lineno, self.patterns.literal, nodes.literal,

901 restore_backslashes=True)

902 return before, inlines, remaining, sysmessages

903

904 def inline_internal_target(self, match, lineno):

905 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

906 match, lineno, self.patterns.target, nodes.target)

907 if inlines and isinstance(inlines[0], nodes.target):

908 assert len(inlines) == 1

909 target = inlines[0]

910 name = normalize_name(target.astext())

911 target['names'].append(name)

912 self.document.note_explicit_target(target, self.parent)

913 return before, inlines, remaining, sysmessages

914

915 def substitution_reference(self, match, lineno):

916 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

917 match, lineno, self.patterns.substitution_ref,

918 nodes.substitution_reference)

919 if len(inlines) == 1:

920 subref_node = inlines[0]

921 if isinstance(subref_node, nodes.substitution_reference):

922 subref_text = subref_node.astext()

923 self.document.note_substitution_ref(subref_node, subref_text)

924 if endstring[-1:] == '_':

925 reference_node = nodes.reference(

926 '|%s%s' % (subref_text, endstring), '')

927 if endstring[-2:] == '__':

928 reference_node['anonymous'] = 1

929 else:

930 reference_node['refname'] = normalize_name(subref_text)

931 self.document.note_refname(reference_node)

932 reference_node += subref_node

933 inlines = [reference_node]

934 return before, inlines, remaining, sysmessages

935

936 def footnote_reference(self, match, lineno):

937 """

938 Handles `nodes.footnote_reference` and `nodes.citation_reference`

939 elements.

940 """

941 label = match.group('footnotelabel')

942 refname = normalize_name(label)

943 string = match.string

944 before = string[:match.start('whole')]

945 remaining = string[match.end('whole'):]

946 if match.group('citationlabel'):

947 refnode = nodes.citation_reference('[%s]_' % label,

948 refname=refname)

949 refnode += nodes.Text(label)

950 self.document.note_citation_ref(refnode)

951 else:

952 refnode = nodes.footnote_reference('[%s]_' % label)

953 if refname[0] == '#':

954 refname = refname[1:]

955 refnode['auto'] = 1

956 self.document.note_autofootnote_ref(refnode)

957 elif refname == '*':

958 refname = ''

959 refnode['auto'] = '*'

960 self.document.note_symbol_footnote_ref(

961 refnode)

962 else:

963 refnode += nodes.Text(label)

964 if refname:

965 refnode['refname'] = refname

966 self.document.note_footnote_ref(refnode)

967 if utils.get_trim_footnote_ref_space(self.document.settings):

968 before = before.rstrip()

969 return before, [refnode], remaining, []

970

971 def reference(self, match, lineno, anonymous=False):

972 referencename = match.group('refname')

973 refname = normalize_name(referencename)

974 referencenode = nodes.reference(

975 referencename + match.group('refend'), referencename,

976 name=whitespace_normalize_name(referencename))

977 referencenode[0].rawsource = referencename

978 if anonymous:

979 referencenode['anonymous'] = 1

980 else:

981 referencenode['refname'] = refname

982 self.document.note_refname(referencenode)

983 string = match.string

984 matchstart = match.start('whole')

985 matchend = match.end('whole')

986 return string[:matchstart], [referencenode], string[matchend:], []

987

988 def anonymous_reference(self, match, lineno):

989 return self.reference(match, lineno, anonymous=True)

990

991 def standalone_uri(self, match, lineno):

992 if (not match.group('scheme')

993 or match.group('scheme').lower() in urischemes.schemes):

994 if match.group('email'):

995 addscheme = 'mailto:'

996 else:

997 addscheme = ''

998 text = match.group('whole')

999 refuri = addscheme + unescape(text)

1000 reference = nodes.reference(unescape(text, True), text,

1001 refuri=refuri)

1002 return [reference]

1003 else: # not a valid scheme

1004 raise MarkupMismatch

1005

1006 def pep_reference(self, match, lineno):

1007 text = match.group(0)

1008 if text.startswith('pep-'):

1009 pepnum = int(unescape(match.group('pepnum1')))

1010 elif text.startswith('PEP'):

1011 pepnum = int(unescape(match.group('pepnum2')))

1012 else:

1013 raise MarkupMismatch

1014 ref = (self.document.settings.pep_base_url

1015 + self.document.settings.pep_file_url_template % pepnum)

1016 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1017

1018 rfc_url = 'rfc%d.html'

1019

1020 def rfc_reference(self, match, lineno):

1021 text = match.group(0)

1022 if text.startswith('RFC'):

1023 rfcnum = int(unescape(match.group('rfcnum')))

1024 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum

1025 else:

1026 raise MarkupMismatch

1027 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1028

1029 def implicit_inline(self, text, lineno):

1030 """

1031 Check each of the patterns in `self.implicit_dispatch` for a match,

1032 and dispatch to the stored method for the pattern. Recursively check

1033 the text before and after the match. Return a list of `nodes.Text`

1034 and inline element nodes.

1035 """

1036 if not text:

1037 return []

1038 for pattern, method in self.implicit_dispatch:

1039 match = pattern.search(text)

1040 if match:

1041 try:

1042 # Must recurse on strings before *and* after the match;

1043 # there may be multiple patterns.

1044 return (self.implicit_inline(text[:match.start()], lineno)

1045 + method(match, lineno)

1046 + self.implicit_inline(text[match.end():], lineno))

1047 except MarkupMismatch:

1048 pass

1049 return [nodes.Text(text)]

1050

1051 dispatch = {'*': emphasis,

1052 '**': strong,

1053 '`': interpreted_or_phrase_ref,

1054 '``': literal,

1055 '_`': inline_internal_target,

1056 ']_': footnote_reference,

1057 '|': substitution_reference,

1058 '_': reference,

1059 '__': anonymous_reference}

1060

1061

1062def _loweralpha_to_int(s, _zero=(ord('a')-1)):

1063 return ord(s) - _zero

1064

1065

1066def _upperalpha_to_int(s, _zero=(ord('A')-1)):

1067 return ord(s) - _zero

1068

1069

1070def _lowerroman_to_int(s):

1071 return roman.fromRoman(s.upper())

1072

1073

1074class Body(RSTState):

1075

1076 """

1077 Generic classifier of the first line of a block.

1078 """

1079

1080 double_width_pad_char = tableparser.TableParser.double_width_pad_char

1081 """Padding character for East Asian double-width text."""

1082

1083 enum = Struct()

1084 """Enumerated list parsing information."""

1085

1086 enum.formatinfo = {

1087 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),

1088 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),

1089 'period': Struct(prefix='', suffix='.', start=0, end=-1)}

1090 enum.formats = enum.formatinfo.keys()

1091 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',

1092 'lowerroman', 'upperroman'] # ORDERED!

1093 enum.sequencepats = {'arabic': '[0-9]+',

1094 'loweralpha': '[a-z]',

1095 'upperalpha': '[A-Z]',

1096 'lowerroman': '[ivxlcdm]+',

1097 'upperroman': '[IVXLCDM]+'}

1098 enum.converters = {'arabic': int,

1099 'loweralpha': _loweralpha_to_int,

1100 'upperalpha': _upperalpha_to_int,

1101 'lowerroman': _lowerroman_to_int,

1102 'upperroman': roman.fromRoman}

1103

1104 enum.sequenceregexps = {}

1105 for sequence in enum.sequences:

1106 enum.sequenceregexps[sequence] = re.compile(

1107 enum.sequencepats[sequence] + '$')

1108

1109 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')

1110 """Matches the top (& bottom) of a full table)."""

1111

1112 simple_table_top_pat = re.compile('=+( +=+)+ *$')

1113 """Matches the top of a simple table."""

1114

1115 simple_table_border_pat = re.compile('=+[ =]*$')

1116 """Matches the bottom & header bottom of a simple table."""

1117

1118 pats = {}

1119 """Fragments of patterns used by transitions."""

1120

1121 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'

1122 pats['alpha'] = '[a-zA-Z]'

1123 pats['alphanum'] = '[a-zA-Z0-9]'

1124 pats['alphanumplus'] = '[a-zA-Z0-9_-]'

1125 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'

1126 '|%(upperroman)s|#)' % enum.sequencepats)

1127 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats

1128 # @@@ Loosen up the pattern? Allow Unicode?

1129 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats

1130 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats

1131 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats

1132 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats

1133

1134 for format in enum.formats:

1135 pats[format] = '(?P<%s>%s%s%s)' % (

1136 format, re.escape(enum.formatinfo[format].prefix),

1137 pats['enum'], re.escape(enum.formatinfo[format].suffix))

1138

1139 patterns = {

1140 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',

1141 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,

1142 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',

1143 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,

1144 'doctest': r'>>>( +|$)',

1145 'line_block': r'\|( +|$)',

1146 'grid_table_top': grid_table_top_pat,

1147 'simple_table_top': simple_table_top_pat,

1148 'explicit_markup': r'\.\.( +|$)',

1149 'anonymous': r'__( +|$)',

1150 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,

1151 'text': r''}

1152 initial_transitions = (

1153 'bullet',

1154 'enumerator',

1155 'field_marker',

1156 'option_marker',

1157 'doctest',

1158 'line_block',

1159 'grid_table_top',

1160 'simple_table_top',

1161 'explicit_markup',

1162 'anonymous',

1163 'line',

1164 'text')

1165

1166 def indent(self, match, context, next_state):

1167 """Block quote."""

1168 (indented, indent, line_offset, blank_finish

1169 ) = self.state_machine.get_indented()

1170 elements = self.block_quote(indented, line_offset)

1171 self.parent += elements

1172 if not blank_finish:

1173 self.parent += self.unindent_warning('Block quote')

1174 return context, next_state, []

1175

1176 def block_quote(self, indented, line_offset):

1177 elements = []

1178 while indented:

1179 blockquote = nodes.block_quote(rawsource='\n'.join(indented))

1180 (blockquote.source, blockquote.line

1181 ) = self.state_machine.get_source_and_line(line_offset+1)

1182 (blockquote_lines,

1183 attribution_lines,

1184 attribution_offset,

1185 indented,

1186 new_line_offset) = self.split_attribution(indented, line_offset)

1187 self.nested_parse(blockquote_lines, line_offset, blockquote)

1188 elements.append(blockquote)

1189 if attribution_lines:

1190 attribution, messages = self.parse_attribution(

1191 attribution_lines, line_offset+attribution_offset)

1192 blockquote += attribution

1193 elements += messages

1194 line_offset = new_line_offset

1195 while indented and not indented[0]:

1196 indented = indented[1:]

1197 line_offset += 1

1198 return elements

1199

1200 # U+2014 is an em-dash:

1201 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')

1202

1203 def split_attribution(self, indented, line_offset):

1204 """

1205 Check for a block quote attribution and split it off:

1206

1207 * First line after a blank line must begin with a dash ("--", "---",

1208 em-dash; matches `self.attribution_pattern`).

1209 * Every line after that must have consistent indentation.

1210 * Attributions must be preceded by block quote content.

1211

1212 Return a tuple of: (block quote content lines, attribution lines,

1213 attribution offset, remaining indented lines, remaining lines offset).

1214 """

1215 blank = None

1216 nonblank_seen = False

1217 for i in range(len(indented)):

1218 line = indented[i].rstrip()

1219 if line:

1220 if nonblank_seen and blank == i - 1: # last line blank

1221 match = self.attribution_pattern.match(line)

1222 if match:

1223 attribution_end, indent = self.check_attribution(

1224 indented, i)

1225 if attribution_end:

1226 a_lines = indented[i:attribution_end]

1227 a_lines.trim_left(match.end(), end=1)

1228 a_lines.trim_left(indent, start=1)

1229 return (indented[:i], a_lines,

1230 i, indented[attribution_end:],

1231 line_offset + attribution_end)

1232 nonblank_seen = True

1233 else:

1234 blank = i

1235 else:

1236 return indented, None, None, None, None

1237

1238 def check_attribution(self, indented, attribution_start):

1239 """

1240 Check attribution shape.

1241 Return the index past the end of the attribution, and the indent.

1242 """

1243 indent = None

1244 i = attribution_start + 1

1245 for i in range(attribution_start + 1, len(indented)):

1246 line = indented[i].rstrip()

1247 if not line:

1248 break

1249 if indent is None:

1250 indent = len(line) - len(line.lstrip())

1251 elif len(line) - len(line.lstrip()) != indent:

1252 return None, None # bad shape; not an attribution

1253 else:

1254 # return index of line after last attribution line:

1255 i += 1

1256 return i, (indent or 0)

1257

1258 def parse_attribution(self, indented, line_offset):

1259 text = '\n'.join(indented).rstrip()

1260 lineno = 1 + line_offset # line_offset is zero-based

1261 textnodes, messages = self.inline_text(text, lineno)

1262 node = nodes.attribution(text, '', *textnodes)

1263 node.source, node.line = self.state_machine.get_source_and_line(lineno)

1264 return node, messages

1265

1266 def bullet(self, match, context, next_state):

1267 """Bullet list item."""

1268 ul = nodes.bullet_list()

1269 ul.source, ul.line = self.state_machine.get_source_and_line()

1270 self.parent += ul

1271 ul['bullet'] = match.string[0]

1272 i, blank_finish = self.list_item(match.end())

1273 ul += i

1274 offset = self.state_machine.line_offset + 1 # next line

1275 new_line_offset, blank_finish = self.nested_list_parse(

1276 self.state_machine.input_lines[offset:],

1277 input_offset=self.state_machine.abs_line_offset() + 1,

1278 node=ul, initial_state='BulletList',

1279 blank_finish=blank_finish)

1280 self.goto_line(new_line_offset)

1281 if not blank_finish:

1282 self.parent += self.unindent_warning('Bullet list')

1283 return [], next_state, []

1284

1285 def list_item(self, indent):

1286 src, srcline = self.state_machine.get_source_and_line()

1287 if self.state_machine.line[indent:]:

1288 indented, line_offset, blank_finish = (

1289 self.state_machine.get_known_indented(indent))

1290 else:

1291 indented, indent, line_offset, blank_finish = (

1292 self.state_machine.get_first_known_indented(indent))

1293 listitem = nodes.list_item('\n'.join(indented))

1294 listitem.source, listitem.line = src, srcline

1295 if indented:

1296 self.nested_parse(indented, input_offset=line_offset,

1297 node=listitem)

1298 return listitem, blank_finish

1299

1300 def enumerator(self, match, context, next_state):

1301 """Enumerated List Item"""

1302 format, sequence, text, ordinal = self.parse_enumerator(match)

1303 if not self.is_enumerated_list_item(ordinal, sequence, format):

1304 raise statemachine.TransitionCorrection('text')

1305 enumlist = nodes.enumerated_list()

1306 self.parent += enumlist

1307 if sequence == '#':

1308 enumlist['enumtype'] = 'arabic'

1309 else:

1310 enumlist['enumtype'] = sequence

1311 enumlist['prefix'] = self.enum.formatinfo[format].prefix

1312 enumlist['suffix'] = self.enum.formatinfo[format].suffix

1313 if ordinal != 1:

1314 enumlist['start'] = ordinal

1315 msg = self.reporter.info(

1316 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'

1317 % (text, ordinal))

1318 self.parent += msg

1319 listitem, blank_finish = self.list_item(match.end())

1320 enumlist += listitem

1321 offset = self.state_machine.line_offset + 1 # next line

1322 newline_offset, blank_finish = self.nested_list_parse(

1323 self.state_machine.input_lines[offset:],

1324 input_offset=self.state_machine.abs_line_offset() + 1,

1325 node=enumlist, initial_state='EnumeratedList',

1326 blank_finish=blank_finish,

1327 extra_settings={'lastordinal': ordinal,

1328 'format': format,

1329 'auto': sequence == '#'})

1330 self.goto_line(newline_offset)

1331 if not blank_finish:

1332 self.parent += self.unindent_warning('Enumerated list')

1333 return [], next_state, []

1334

1335 def parse_enumerator(self, match, expected_sequence=None):

1336 """

1337 Analyze an enumerator and return the results.

1338

1339 :Return:

1340 - the enumerator format ('period', 'parens', or 'rparen'),

1341 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),

1342 - the text of the enumerator, stripped of formatting, and

1343 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;

1344 ``None`` is returned for invalid enumerator text).

1345

1346 The enumerator format has already been determined by the regular

1347 expression match. If `expected_sequence` is given, that sequence is

1348 tried first. If not, we check for Roman numeral 1. This way,

1349 single-character Roman numerals (which are also alphabetical) can be

1350 matched. If no sequence has been matched, all sequences are checked in

1351 order.

1352 """

1353 groupdict = match.groupdict()

1354 sequence = ''

1355 for format in self.enum.formats:

1356 if groupdict[format]: # was this the format matched?

1357 break # yes; keep `format`

1358 else: # shouldn't happen

1359 raise ParserError('enumerator format not matched')

1360 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501

1361 : self.enum.formatinfo[format].end]

1362 if text == '#':

1363 sequence = '#'

1364 elif expected_sequence:

1365 try:

1366 if self.enum.sequenceregexps[expected_sequence].match(text):

1367 sequence = expected_sequence

1368 except KeyError: # shouldn't happen

1369 raise ParserError('unknown enumerator sequence: %s'

1370 % sequence)

1371 elif text == 'i':

1372 sequence = 'lowerroman'

1373 elif text == 'I':

1374 sequence = 'upperroman'

1375 if not sequence:

1376 for sequence in self.enum.sequences:

1377 if self.enum.sequenceregexps[sequence].match(text):

1378 break

1379 else: # shouldn't happen

1380 raise ParserError('enumerator sequence not matched')

1381 if sequence == '#':

1382 ordinal = 1

1383 else:

1384 try:

1385 ordinal = self.enum.converters[sequence](text)

1386 except roman.InvalidRomanNumeralError:

1387 ordinal = None

1388 return format, sequence, text, ordinal

1389

1390 def is_enumerated_list_item(self, ordinal, sequence, format):

1391 """

1392 Check validity based on the ordinal value and the second line.

1393

1394 Return true if the ordinal is valid and the second line is blank,

1395 indented, or starts with the next enumerator or an auto-enumerator.

1396 """

1397 if ordinal is None:

1398 return None

1399 try:

1400 next_line = self.state_machine.next_line()

1401 except EOFError: # end of input lines

1402 self.state_machine.previous_line()

1403 return 1

1404 else:

1405 self.state_machine.previous_line()

1406 if not next_line[:1].strip(): # blank or indented

1407 return 1

1408 result = self.make_enumerator(ordinal + 1, sequence, format)

1409 if result:

1410 next_enumerator, auto_enumerator = result

1411 try:

1412 if (next_line.startswith(next_enumerator)

1413 or next_line.startswith(auto_enumerator)):

1414 return 1

1415 except TypeError:

1416 pass

1417 return None

1418

1419 def make_enumerator(self, ordinal, sequence, format):

1420 """

1421 Construct and return the next enumerated list item marker, and an

1422 auto-enumerator ("#" instead of the regular enumerator).

1423

1424 Return ``None`` for invalid (out of range) ordinals.

1425 """

1426 if sequence == '#':

1427 enumerator = '#'

1428 elif sequence == 'arabic':

1429 enumerator = str(ordinal)

1430 else:

1431 if sequence.endswith('alpha'):

1432 if ordinal > 26:

1433 return None

1434 enumerator = chr(ordinal + ord('a') - 1)

1435 elif sequence.endswith('roman'):

1436 try:

1437 enumerator = roman.toRoman(ordinal)

1438 except roman.RomanError:

1439 return None

1440 else: # shouldn't happen

1441 raise ParserError('unknown enumerator sequence: "%s"'

1442 % sequence)

1443 if sequence.startswith('lower'):

1444 enumerator = enumerator.lower()

1445 elif sequence.startswith('upper'):

1446 enumerator = enumerator.upper()

1447 else: # shouldn't happen

1448 raise ParserError('unknown enumerator sequence: "%s"'

1449 % sequence)

1450 formatinfo = self.enum.formatinfo[format]

1451 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix

1452 + ' ')

1453 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '

1454 return next_enumerator, auto_enumerator

1455

1456 def field_marker(self, match, context, next_state):

1457 """Field list item."""

1458 field_list = nodes.field_list()

1459 self.parent += field_list

1460 field, blank_finish = self.field(match)

1461 field_list += field

1462 offset = self.state_machine.line_offset + 1 # next line

1463 newline_offset, blank_finish = self.nested_list_parse(

1464 self.state_machine.input_lines[offset:],

1465 input_offset=self.state_machine.abs_line_offset() + 1,

1466 node=field_list, initial_state='FieldList',

1467 blank_finish=blank_finish)

1468 self.goto_line(newline_offset)

1469 if not blank_finish:

1470 self.parent += self.unindent_warning('Field list')

1471 return [], next_state, []

1472

1473 def field(self, match):

1474 name = self.parse_field_marker(match)

1475 src, srcline = self.state_machine.get_source_and_line()

1476 lineno = self.state_machine.abs_line_number()

1477 (indented, indent, line_offset, blank_finish

1478 ) = self.state_machine.get_first_known_indented(match.end())

1479 field_node = nodes.field()

1480 field_node.source = src

1481 field_node.line = srcline

1482 name_nodes, name_messages = self.inline_text(name, lineno)

1483 field_node += nodes.field_name(name, '', *name_nodes)

1484 field_body = nodes.field_body('\n'.join(indented), *name_messages)

1485 field_node += field_body

1486 if indented:

1487 self.parse_field_body(indented, line_offset, field_body)

1488 return field_node, blank_finish

1489

1490 def parse_field_marker(self, match):

1491 """Extract & return field name from a field marker match."""

1492 field = match.group()[1:] # strip off leading ':'

1493 field = field[:field.rfind(':')] # strip off trailing ':' etc.

1494 return field

1495

1496 def parse_field_body(self, indented, offset, node):

1497 self.nested_parse(indented, input_offset=offset, node=node)

1498

1499 def option_marker(self, match, context, next_state):

1500 """Option list item."""

1501 optionlist = nodes.option_list()

1502 (optionlist.source, optionlist.line

1503 ) = self.state_machine.get_source_and_line()

1504 try:

1505 listitem, blank_finish = self.option_list_item(match)

1506 except MarkupError as error:

1507 # This shouldn't happen; pattern won't match.

1508 msg = self.reporter.error('Invalid option list marker: %s'

1509 % error)

1510 self.parent += msg

1511 (indented, indent, line_offset, blank_finish

1512 ) = self.state_machine.get_first_known_indented(match.end())

1513 elements = self.block_quote(indented, line_offset)

1514 self.parent += elements

1515 if not blank_finish:

1516 self.parent += self.unindent_warning('Option list')

1517 return [], next_state, []

1518 self.parent += optionlist

1519 optionlist += listitem

1520 offset = self.state_machine.line_offset + 1 # next line

1521 newline_offset, blank_finish = self.nested_list_parse(

1522 self.state_machine.input_lines[offset:],

1523 input_offset=self.state_machine.abs_line_offset() + 1,

1524 node=optionlist, initial_state='OptionList',

1525 blank_finish=blank_finish)

1526 self.goto_line(newline_offset)

1527 if not blank_finish:

1528 self.parent += self.unindent_warning('Option list')

1529 return [], next_state, []

1530

1531 def option_list_item(self, match):

1532 offset = self.state_machine.abs_line_offset()

1533 options = self.parse_option_marker(match)

1534 (indented, indent, line_offset, blank_finish

1535 ) = self.state_machine.get_first_known_indented(match.end())

1536 if not indented: # not an option list item

1537 self.goto_line(offset)

1538 raise statemachine.TransitionCorrection('text')

1539 option_group = nodes.option_group('', *options)

1540 description = nodes.description('\n'.join(indented))

1541 option_list_item = nodes.option_list_item('', option_group,

1542 description)

1543 if indented:

1544 self.nested_parse(indented, input_offset=line_offset,

1545 node=description)

1546 return option_list_item, blank_finish

1547

1548 def parse_option_marker(self, match):

1549 """

1550 Return a list of `node.option` and `node.option_argument` objects,

1551 parsed from an option marker match.

1552

1553 :Exception: `MarkupError` for invalid option markers.

1554 """

1555 optlist = []

1556 # split at ", ", except inside < > (complex arguments)

1557 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())

1558 for optionstring in optionstrings:

1559 tokens = optionstring.split()

1560 delimiter = ' '

1561 firstopt = tokens[0].split('=', 1)

1562 if len(firstopt) > 1:

1563 # "--opt=value" form

1564 tokens[:1] = firstopt

1565 delimiter = '='

1566 elif (len(tokens[0]) > 2

1567 and ((tokens[0].startswith('-')

1568 and not tokens[0].startswith('--'))

1569 or tokens[0].startswith('+'))):

1570 # "-ovalue" form

1571 tokens[:1] = [tokens[0][:2], tokens[0][2:]]

1572 delimiter = ''

1573 if len(tokens) > 1 and (tokens[1].startswith('<')

1574 and tokens[-1].endswith('>')):

1575 # "-o <value1 value2>" form; join all values into one token

1576 tokens[1:] = [' '.join(tokens[1:])]

1577 if 0 < len(tokens) <= 2:

1578 option = nodes.option(optionstring)

1579 option += nodes.option_string(tokens[0], tokens[0])

1580 if len(tokens) > 1:

1581 option += nodes.option_argument(tokens[1], tokens[1],

1582 delimiter=delimiter)

1583 optlist.append(option)

1584 else:

1585 raise MarkupError(

1586 'wrong number of option tokens (=%s), should be 1 or 2: '

1587 '"%s"' % (len(tokens), optionstring))

1588 return optlist

1589

1590 def doctest(self, match, context, next_state):

1591 data = '\n'.join(self.state_machine.get_text_block())

1592 # TODO: prepend class value ['pycon'] (Python Console)

1593 # parse with `directives.body.CodeBlock` (returns literal-block

1594 # with class "code" and syntax highlight markup).

1595 self.parent += nodes.doctest_block(data, data)

1596 return [], next_state, []

1597

1598 def line_block(self, match, context, next_state):

1599 """First line of a line block."""

1600 block = nodes.line_block()

1601 self.parent += block

1602 lineno = self.state_machine.abs_line_number()

1603 line, messages, blank_finish = self.line_block_line(match, lineno)

1604 block += line

1605 self.parent += messages

1606 if not blank_finish:

1607 offset = self.state_machine.line_offset + 1 # next line

1608 new_line_offset, blank_finish = self.nested_list_parse(

1609 self.state_machine.input_lines[offset:],

1610 input_offset=self.state_machine.abs_line_offset() + 1,

1611 node=block, initial_state='LineBlock',

1612 blank_finish=0)

1613 self.goto_line(new_line_offset)

1614 if not blank_finish:

1615 self.parent += self.reporter.warning(

1616 'Line block ends without a blank line.',

1617 line=lineno+1)

1618 if len(block):

1619 if block[0].indent is None:

1620 block[0].indent = 0

1621 self.nest_line_block_lines(block)

1622 return [], next_state, []

1623

1624 def line_block_line(self, match, lineno):

1625 """Return one line element of a line_block."""

1626 (indented, indent, line_offset, blank_finish

1627 ) = self.state_machine.get_first_known_indented(match.end(),

1628 until_blank=True)

1629 text = '\n'.join(indented)

1630 text_nodes, messages = self.inline_text(text, lineno)

1631 line = nodes.line(text, '', *text_nodes)

1632 if match.string.rstrip() != '|': # not empty

1633 line.indent = len(match.group(1)) - 1

1634 return line, messages, blank_finish

1635

1636 def nest_line_block_lines(self, block):

1637 for index in range(1, len(block)):

1638 if getattr(block[index], 'indent', None) is None:

1639 block[index].indent = block[index - 1].indent

1640 self.nest_line_block_segment(block)

1641

1642 def nest_line_block_segment(self, block):

1643 indents = [item.indent for item in block]

1644 least = min(indents)

1645 new_items = []

1646 new_block = nodes.line_block()

1647 for item in block:

1648 if item.indent > least:

1649 new_block.append(item)

1650 else:

1651 if len(new_block):

1652 self.nest_line_block_segment(new_block)

1653 new_items.append(new_block)

1654 new_block = nodes.line_block()

1655 new_items.append(item)

1656 if len(new_block):

1657 self.nest_line_block_segment(new_block)

1658 new_items.append(new_block)

1659 block[:] = new_items

1660

1661 def grid_table_top(self, match, context, next_state):

1662 """Top border of a full table."""

1663 return self.table_top(match, context, next_state,

1664 self.isolate_grid_table,

1665 tableparser.GridTableParser)

1666

1667 def simple_table_top(self, match, context, next_state):

1668 """Top border of a simple table."""

1669 return self.table_top(match, context, next_state,

1670 self.isolate_simple_table,

1671 tableparser.SimpleTableParser)

1672

1673 def table_top(self, match, context, next_state,

1674 isolate_function, parser_class):

1675 """Top border of a generic table."""

1676 nodelist, blank_finish = self.table(isolate_function, parser_class)

1677 self.parent += nodelist

1678 if not blank_finish:

1679 msg = self.reporter.warning(

1680 'Blank line required after table.',

1681 line=self.state_machine.abs_line_number()+1)

1682 self.parent += msg

1683 return [], next_state, []

1684

1685 def table(self, isolate_function, parser_class):

1686 """Parse a table."""

1687 block, messages, blank_finish = isolate_function()

1688 if block:

1689 try:

1690 parser = parser_class()

1691 tabledata = parser.parse(block)

1692 tableline = (self.state_machine.abs_line_number() - len(block)

1693 + 1)

1694 table = self.build_table(tabledata, tableline)

1695 nodelist = [table] + messages

1696 except tableparser.TableMarkupError as err:

1697 nodelist = self.malformed_table(block, ' '.join(err.args),

1698 offset=err.offset) + messages

1699 else:

1700 nodelist = messages

1701 return nodelist, blank_finish

1702

1703 def isolate_grid_table(self):

1704 messages = []

1705 blank_finish = 1

1706 try:

1707 block = self.state_machine.get_text_block(flush_left=True)

1708 except statemachine.UnexpectedIndentationError as err:

1709 block, src, srcline = err.args

1710 messages.append(self.reporter.error('Unexpected indentation.',

1711 source=src, line=srcline))

1712 blank_finish = 0

1713 block.disconnect()

1714 # for East Asian chars:

1715 block.pad_double_width(self.double_width_pad_char)

1716 width = len(block[0].strip())

1717 for i in range(len(block)):

1718 block[i] = block[i].strip()

1719 if block[i][0] not in '+|': # check left edge

1720 blank_finish = 0

1721 self.state_machine.previous_line(len(block) - i)

1722 del block[i:]

1723 break

1724 if not self.grid_table_top_pat.match(block[-1]): # find bottom

1725 blank_finish = 0

1726 # from second-last to third line of table:

1727 for i in range(len(block) - 2, 1, -1):

1728 if self.grid_table_top_pat.match(block[i]):

1729 self.state_machine.previous_line(len(block) - i + 1)

1730 del block[i+1:]

1731 break

1732 else:

1733 messages.extend(self.malformed_table(block))

1734 return [], messages, blank_finish

1735 for i in range(len(block)): # check right edge

1736 if len(block[i]) != width or block[i][-1] not in '+|':

1737 messages.extend(self.malformed_table(block))

1738 return [], messages, blank_finish

1739 return block, messages, blank_finish

1740

1741 def isolate_simple_table(self):

1742 start = self.state_machine.line_offset

1743 lines = self.state_machine.input_lines

1744 limit = len(lines) - 1

1745 toplen = len(lines[start].strip())

1746 pattern_match = self.simple_table_border_pat.match

1747 found = 0

1748 found_at = None

1749 i = start + 1

1750 while i <= limit:

1751 line = lines[i]

1752 match = pattern_match(line)

1753 if match:

1754 if len(line.strip()) != toplen:

1755 self.state_machine.next_line(i - start)

1756 messages = self.malformed_table(

1757 lines[start:i+1], 'Bottom/header table border does '

1758 'not match top border.')

1759 return [], messages, i == limit or not lines[i+1].strip()

1760 found += 1

1761 found_at = i

1762 if found == 2 or i == limit or not lines[i+1].strip():

1763 end = i

1764 break

1765 i += 1

1766 else: # reached end of input_lines

1767 if found:

1768 extra = ' or no blank line after table bottom'

1769 self.state_machine.next_line(found_at - start)

1770 block = lines[start:found_at+1]

1771 else:

1772 extra = ''

1773 self.state_machine.next_line(i - start - 1)

1774 block = lines[start:]

1775 messages = self.malformed_table(

1776 block, 'No bottom table border found%s.' % extra)

1777 return [], messages, not extra

1778 self.state_machine.next_line(end - start)

1779 block = lines[start:end+1]

1780 # for East Asian chars:

1781 block.pad_double_width(self.double_width_pad_char)

1782 return block, [], end == limit or not lines[end+1].strip()

1783

1784 def malformed_table(self, block, detail='', offset=0):

1785 block.replace(self.double_width_pad_char, '')

1786 data = '\n'.join(block)

1787 message = 'Malformed table.'

1788 startline = self.state_machine.abs_line_number() - len(block) + 1

1789 if detail:

1790 message += '\n' + detail

1791 error = self.reporter.error(message, nodes.literal_block(data, data),

1792 line=startline+offset)

1793 return [error]

1794

1795 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):

1796 colwidths, headrows, bodyrows = tabledata

1797 table = nodes.table()

1798 if widths == 'auto':

1799 table['classes'] += ['colwidths-auto']

1800 elif widths: # "grid" or list of integers

1801 table['classes'] += ['colwidths-given']

1802 tgroup = nodes.tgroup(cols=len(colwidths))

1803 table += tgroup

1804 for colwidth in colwidths:

1805 colspec = nodes.colspec(colwidth=colwidth)

1806 if stub_columns:

1807 colspec.attributes['stub'] = 1

1808 stub_columns -= 1

1809 tgroup += colspec

1810 if headrows:

1811 thead = nodes.thead()

1812 tgroup += thead

1813 for row in headrows:

1814 thead += self.build_table_row(row, tableline)

1815 tbody = nodes.tbody()

1816 tgroup += tbody

1817 for row in bodyrows:

1818 tbody += self.build_table_row(row, tableline)

1819 return table

1820

1821 def build_table_row(self, rowdata, tableline):

1822 row = nodes.row()

1823 for cell in rowdata:

1824 if cell is None:

1825 continue

1826 morerows, morecols, offset, cellblock = cell

1827 attributes = {}

1828 if morerows:

1829 attributes['morerows'] = morerows

1830 if morecols:

1831 attributes['morecols'] = morecols

1832 entry = nodes.entry(**attributes)

1833 row += entry

1834 if ''.join(cellblock):

1835 self.nested_parse(cellblock, input_offset=tableline+offset,

1836 node=entry)

1837 return row

1838

1839 explicit = Struct()

1840 """Patterns and constants used for explicit markup recognition."""

1841

1842 explicit.patterns = Struct(

1843 target=re.compile(r"""

1844 (

1845 _ # anonymous target

1846 | # *OR*

1847 (?!_) # no underscore at the beginning

1848 (?P<quote>`?) # optional open quote

1849 (?![ `]) # first char. not space or

1850 # backquote

1851 (?P<name> # reference name

1852 .+?

1853 )

1854 %(non_whitespace_escape_before)s

1855 (?P=quote) # close quote if open quote used

1856 )

1857 (?<!(?<!\x00):) # no unescaped colon at end

1858 %(non_whitespace_escape_before)s

1859 [ ]? # optional space

1860 : # end of reference name

1861 ([ ]+|$) # followed by whitespace

1862 """ % vars(Inliner), re.VERBOSE),

1863 reference=re.compile(r"""

1864 (

1865 (?P<simple>%(simplename)s)_

1866 | # *OR*

1867 ` # open backquote

1868 (?![ ]) # not space

1869 (?P<phrase>.+?) # hyperlink phrase

1870 %(non_whitespace_escape_before)s

1871 `_ # close backquote,

1872 # reference mark

1873 )

1874 $ # end of string

1875 """ % vars(Inliner), re.VERBOSE),

1876 substitution=re.compile(r"""

1877 (

1878 (?![ ]) # first char. not space

1879 (?P<name>.+?) # substitution text

1880 %(non_whitespace_escape_before)s

1881 \| # close delimiter

1882 )

1883 ([ ]+|$) # followed by whitespace

1884 """ % vars(Inliner),

1885 re.VERBOSE),)

1886

1887 def footnote(self, match):

1888 src, srcline = self.state_machine.get_source_and_line()

1889 (indented, indent, offset, blank_finish

1890 ) = self.state_machine.get_first_known_indented(match.end())

1891 label = match.group(1)

1892 name = normalize_name(label)

1893 footnote = nodes.footnote('\n'.join(indented))

1894 footnote.source = src

1895 footnote.line = srcline

1896 if name[0] == '#': # auto-numbered

1897 name = name[1:] # autonumber label

1898 footnote['auto'] = 1

1899 if name:

1900 footnote['names'].append(name)

1901 self.document.note_autofootnote(footnote)

1902 elif name == '*': # auto-symbol

1903 name = ''

1904 footnote['auto'] = '*'

1905 self.document.note_symbol_footnote(footnote)

1906 else: # manually numbered

1907 footnote += nodes.label('', label)

1908 footnote['names'].append(name)

1909 self.document.note_footnote(footnote)

1910 if name:

1911 self.document.note_explicit_target(footnote, footnote)

1912 else:

1913 self.document.set_id(footnote, footnote)

1914 if indented:

1915 self.nested_parse(indented, input_offset=offset, node=footnote)

1916 return [footnote], blank_finish

1917

1918 def citation(self, match):

1919 src, srcline = self.state_machine.get_source_and_line()

1920 (indented, indent, offset, blank_finish

1921 ) = self.state_machine.get_first_known_indented(match.end())

1922 label = match.group(1)

1923 name = normalize_name(label)

1924 citation = nodes.citation('\n'.join(indented))

1925 citation.source = src

1926 citation.line = srcline

1927 citation += nodes.label('', label)

1928 citation['names'].append(name)

1929 self.document.note_citation(citation)

1930 self.document.note_explicit_target(citation, citation)

1931 if indented:

1932 self.nested_parse(indented, input_offset=offset, node=citation)

1933 return [citation], blank_finish

1934

1935 def hyperlink_target(self, match):

1936 pattern = self.explicit.patterns.target

1937 lineno = self.state_machine.abs_line_number()

1938 (block, indent, offset, blank_finish

1939 ) = self.state_machine.get_first_known_indented(

1940 match.end(), until_blank=True, strip_indent=False)

1941 blocktext = match.string[:match.end()] + '\n'.join(block)

1942 block = [escape2null(line) for line in block]

1943 escaped = block[0]

1944 blockindex = 0

1945 while True:

1946 targetmatch = pattern.match(escaped)

1947 if targetmatch:

1948 break

1949 blockindex += 1

1950 try:

1951 escaped += block[blockindex]

1952 except IndexError:

1953 raise MarkupError('malformed hyperlink target.')

1954 del block[:blockindex]

1955 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()

1956 target = self.make_target(block, blocktext, lineno,

1957 targetmatch.group('name'))

1958 return [target], blank_finish

1959

1960 def make_target(self, block, block_text, lineno, target_name):

1961 target_type, data = self.parse_target(block, block_text, lineno)

1962 if target_type == 'refname':

1963 target = nodes.target(block_text, '', refname=normalize_name(data))

1964 target.indirect_reference_name = data

1965 self.add_target(target_name, '', target, lineno)

1966 self.document.note_indirect_target(target)

1967 return target

1968 elif target_type == 'refuri':

1969 target = nodes.target(block_text, '')

1970 self.add_target(target_name, data, target, lineno)

1971 return target

1972 else:

1973 return data

1974

1975 def parse_target(self, block, block_text, lineno):

1976 """

1977 Determine the type of reference of a target.

1978

1979 :Return: A 2-tuple, one of:

1980

1981 - 'refname' and the indirect reference name

1982 - 'refuri' and the URI

1983 - 'malformed' and a system_message node

1984 """

1985 if block and block[-1].strip()[-1:] == '_': # possible indirect target

1986 reference = ' '.join(line.strip() for line in block)

1987 refname = self.is_reference(reference)

1988 if refname:

1989 return 'refname', refname

1990 ref_parts = split_escaped_whitespace(' '.join(block))

1991 reference = ' '.join(''.join(unescape(part).split())

1992 for part in ref_parts)

1993 return 'refuri', reference

1994

1995 def is_reference(self, reference):

1996 match = self.explicit.patterns.reference.match(

1997 whitespace_normalize_name(reference))

1998 if not match:

1999 return None

2000 return unescape(match.group('simple') or match.group('phrase'))

2001

2002 def add_target(self, targetname, refuri, target, lineno):

2003 target.line = lineno

2004 if targetname:

2005 name = normalize_name(unescape(targetname))

2006 target['names'].append(name)

2007 if refuri:

2008 uri = self.inliner.adjust_uri(refuri)

2009 if uri:

2010 target['refuri'] = uri

2011 else:

2012 raise ApplicationError('problem with URI: %r' % refuri)

2013 self.document.note_explicit_target(target, self.parent)

2014 else: # anonymous target

2015 if refuri:

2016 target['refuri'] = refuri

2017 target['anonymous'] = 1

2018 self.document.note_anonymous_target(target)

2019

2020 def substitution_def(self, match):

2021 pattern = self.explicit.patterns.substitution

2022 src, srcline = self.state_machine.get_source_and_line()

2023 (block, indent, offset, blank_finish

2024 ) = self.state_machine.get_first_known_indented(match.end(),

2025 strip_indent=False)

2026 blocktext = (match.string[:match.end()] + '\n'.join(block))

2027 block.disconnect()

2028 escaped = escape2null(block[0].rstrip())

2029 blockindex = 0

2030 while True:

2031 subdefmatch = pattern.match(escaped)

2032 if subdefmatch:

2033 break

2034 blockindex += 1

2035 try:

2036 escaped = escaped + ' ' + escape2null(

2037 block[blockindex].strip())

2038 except IndexError:

2039 raise MarkupError('malformed substitution definition.')

2040 del block[:blockindex] # strip out the substitution marker

2041 start = subdefmatch.end()-len(escaped)-1

2042 block[0] = (block[0].strip() + ' ')[start:-1]

2043 if not block[0]:

2044 del block[0]

2045 offset += 1

2046 while block and not block[-1].strip():

2047 block.pop()

2048 subname = subdefmatch.group('name')

2049 substitution_node = nodes.substitution_definition(blocktext)

2050 substitution_node.source = src

2051 substitution_node.line = srcline

2052 if not block:

2053 msg = self.reporter.warning(

2054 'Substitution definition "%s" missing contents.' % subname,

2055 nodes.literal_block(blocktext, blocktext),

2056 source=src, line=srcline)

2057 return [msg], blank_finish

2058 block[0] = block[0].strip()

2059 substitution_node['names'].append(

2060 nodes.whitespace_normalize_name(subname))

2061 new_abs_offset, blank_finish = self.nested_list_parse(

2062 block, input_offset=offset, node=substitution_node,

2063 initial_state='SubstitutionDef', blank_finish=blank_finish)

2064 i = 0

2065 for node in substitution_node[:]:

2066 if not (isinstance(node, nodes.Inline)

2067 or isinstance(node, nodes.Text)):

2068 self.parent += substitution_node[i]

2069 del substitution_node[i]

2070 else:

2071 i += 1

2072 for node in substitution_node.findall(nodes.Element):

2073 if self.disallowed_inside_substitution_definitions(node):

2074 pformat = nodes.literal_block('', node.pformat().rstrip())

2075 msg = self.reporter.error(

2076 'Substitution definition contains illegal element <%s>:'

2077 % node.tagname,

2078 pformat, nodes.literal_block(blocktext, blocktext),

2079 source=src, line=srcline)

2080 return [msg], blank_finish

2081 if len(substitution_node) == 0:

2082 msg = self.reporter.warning(

2083 'Substitution definition "%s" empty or invalid.' % subname,

2084 nodes.literal_block(blocktext, blocktext),

2085 source=src, line=srcline)

2086 return [msg], blank_finish

2087 self.document.note_substitution_def(

2088 substitution_node, subname, self.parent)

2089 return [substitution_node], blank_finish

2090

2091 def disallowed_inside_substitution_definitions(self, node):

2092 if (node['ids']

2093 or isinstance(node, nodes.reference) and node.get('anonymous')

2094 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501

2095 return True

2096 else:

2097 return False

2098

2099 def directive(self, match, **option_presets):

2100 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""

2101 type_name = match.group(1)

2102 directive_class, messages = directives.directive(

2103 type_name, self.memo.language, self.document)

2104 self.parent += messages

2105 if directive_class:

2106 return self.run_directive(

2107 directive_class, match, type_name, option_presets)

2108 else:

2109 return self.unknown_directive(type_name)

2110

2111 def run_directive(self, directive, match, type_name, option_presets):

2112 """

2113 Parse a directive then run its directive function.

2114

2115 Parameters:

2116

2117 - `directive`: The class implementing the directive. Must be

2118 a subclass of `rst.Directive`.

2119

2120 - `match`: A regular expression match object which matched the first

2121 line of the directive.

2122

2123 - `type_name`: The directive name, as used in the source text.

2124

2125 - `option_presets`: A dictionary of preset options, defaults for the

2126 directive options. Currently, only an "alt" option is passed by

2127 substitution definitions (value: the substitution name), which may

2128 be used by an embedded image directive.

2129

2130 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.

2131 """

2132 if isinstance(directive, (FunctionType, MethodType)):

2133 from docutils.parsers.rst import convert_directive_function

2134 directive = convert_directive_function(directive)

2135 lineno = self.state_machine.abs_line_number()

2136 initial_line_offset = self.state_machine.line_offset

2137 (indented, indent, line_offset, blank_finish

2138 ) = self.state_machine.get_first_known_indented(match.end(),

2139 strip_top=0)

2140 block_text = '\n'.join(self.state_machine.input_lines[

2141 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501

2142 try:

2143 arguments, options, content, content_offset = (

2144 self.parse_directive_block(indented, line_offset,

2145 directive, option_presets))

2146 except MarkupError as detail:

2147 error = self.reporter.error(

2148 'Error in "%s" directive:\n%s.' % (type_name,

2149 ' '.join(detail.args)),

2150 nodes.literal_block(block_text, block_text), line=lineno)

2151 return [error], blank_finish

2152 directive_instance = directive(

2153 type_name, arguments, options, content, lineno,

2154 content_offset, block_text, self, self.state_machine)

2155 try:

2156 result = directive_instance.run()

2157 except docutils.parsers.rst.DirectiveError as error:

2158 msg_node = self.reporter.system_message(error.level, error.msg,

2159 line=lineno)

2160 msg_node += nodes.literal_block(block_text, block_text)

2161 result = [msg_node]

2162 assert isinstance(result, list), \

2163 'Directive "%s" must return a list of nodes.' % type_name

2164 for i in range(len(result)):

2165 assert isinstance(result[i], nodes.Node), \

2166 ('Directive "%s" returned non-Node object (index %s): %r'

2167 % (type_name, i, result[i]))

2168 return (result,

2169 blank_finish or self.state_machine.is_next_line_blank())

2170

2171 def parse_directive_block(self, indented, line_offset, directive,

2172 option_presets):

2173 option_spec = directive.option_spec

2174 has_content = directive.has_content

2175 if indented and not indented[0].strip():

2176 indented.trim_start()

2177 line_offset += 1

2178 while indented and not indented[-1].strip():

2179 indented.trim_end()

2180 if indented and (directive.required_arguments

2181 or directive.optional_arguments

2182 or option_spec):

2183 for i, line in enumerate(indented):

2184 if not line.strip():

2185 break

2186 else:

2187 i += 1

2188 arg_block = indented[:i]

2189 content = indented[i+1:]

2190 content_offset = line_offset + i + 1

2191 else:

2192 content = indented

2193 content_offset = line_offset

2194 arg_block = []

2195 if option_spec:

2196 options, arg_block = self.parse_directive_options(

2197 option_presets, option_spec, arg_block)

2198 else:

2199 options = {}

2200 if arg_block and not (directive.required_arguments

2201 or directive.optional_arguments):

2202 content = arg_block + indented[i:]

2203 content_offset = line_offset

2204 arg_block = []

2205 while content and not content[0].strip():

2206 content.trim_start()

2207 content_offset += 1

2208 if directive.required_arguments or directive.optional_arguments:

2209 arguments = self.parse_directive_arguments(

2210 directive, arg_block)

2211 else:

2212 arguments = []

2213 if content and not has_content:

2214 raise MarkupError('no content permitted')

2215 return arguments, options, content, content_offset

2216

2217 def parse_directive_options(self, option_presets, option_spec, arg_block):

2218 options = option_presets.copy()

2219 for i, line in enumerate(arg_block):

2220 if re.match(Body.patterns['field_marker'], line):

2221 opt_block = arg_block[i:]

2222 arg_block = arg_block[:i]

2223 break

2224 else:

2225 opt_block = []

2226 if opt_block:

2227 success, data = self.parse_extension_options(option_spec,

2228 opt_block)

2229 if success: # data is a dict of options

2230 options.update(data)

2231 else: # data is an error string

2232 raise MarkupError(data)

2233 return options, arg_block

2234

2235 def parse_directive_arguments(self, directive, arg_block):

2236 required = directive.required_arguments

2237 optional = directive.optional_arguments

2238 arg_text = '\n'.join(arg_block)

2239 arguments = arg_text.split()

2240 if len(arguments) < required:

2241 raise MarkupError('%s argument(s) required, %s supplied'

2242 % (required, len(arguments)))

2243 elif len(arguments) > required + optional:

2244 if directive.final_argument_whitespace:

2245 arguments = arg_text.split(None, required + optional - 1)

2246 else:

2247 raise MarkupError(

2248 'maximum %s argument(s) allowed, %s supplied'

2249 % (required + optional, len(arguments)))

2250 return arguments

2251

2252 def parse_extension_options(self, option_spec, datalines):

2253 """

2254 Parse `datalines` for a field list containing extension options

2255 matching `option_spec`.

2256

2257 :Parameters:

2258 - `option_spec`: a mapping of option name to conversion

2259 function, which should raise an exception on bad input.

2260 - `datalines`: a list of input strings.

2261

2262 :Return:

2263 - Success value, 1 or 0.

2264 - An option dictionary on success, an error string on failure.

2265 """

2266 node = nodes.field_list()

2267 newline_offset, blank_finish = self.nested_list_parse(

2268 datalines, 0, node, initial_state='ExtensionOptions',

2269 blank_finish=True)

2270 if newline_offset != len(datalines): # incomplete parse of block

2271 return 0, 'invalid option block'

2272 try:

2273 options = utils.extract_extension_options(node, option_spec)

2274 except KeyError as detail:

2275 return 0, 'unknown option: "%s"' % detail.args[0]

2276 except (ValueError, TypeError) as detail:

2277 return 0, 'invalid option value: %s' % ' '.join(detail.args)

2278 except utils.ExtensionOptionError as detail:

2279 return 0, 'invalid option data: %s' % ' '.join(detail.args)

2280 if blank_finish:

2281 return 1, options

2282 else:

2283 return 0, 'option data incompletely parsed'

2284

2285 def unknown_directive(self, type_name):

2286 lineno = self.state_machine.abs_line_number()

2287 (indented, indent, offset, blank_finish

2288 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)

2289 text = '\n'.join(indented)

2290 error = self.reporter.error('Unknown directive type "%s".' % type_name,

2291 nodes.literal_block(text, text),

2292 line=lineno)

2293 return [error], blank_finish

2294

2295 def comment(self, match):

2296 if self.state_machine.is_next_line_blank():

2297 first_comment_line = match.string[match.end():]

2298 if not first_comment_line.strip(): # empty comment

2299 return [nodes.comment()], True # "A tiny but practical wart."

2300 if first_comment_line.startswith('end of inclusion from "'):

2301 # cf. parsers.rst.directives.misc.Include

2302 self.document.include_log.pop()

2303 return [], True

2304 (indented, indent, offset, blank_finish

2305 ) = self.state_machine.get_first_known_indented(match.end())

2306 while indented and not indented[-1].strip():

2307 indented.trim_end()

2308 text = '\n'.join(indented)

2309 return [nodes.comment(text, text)], blank_finish

2310

2311 explicit.constructs = [

2312 (footnote,

2313 re.compile(r"""

2314 \.\.[ ]+ # explicit markup start

2315 \[

2316 ( # footnote label:

2317 [0-9]+ # manually numbered footnote

2318 | # *OR*

2319 \# # anonymous auto-numbered footnote

2320 | # *OR*

2321 \#%s # auto-number ed?) footnote label

2322 | # *OR*

2323 \* # auto-symbol footnote

2324 )

2325 \]

2326 ([ ]+|$) # whitespace or end of line

2327 """ % Inliner.simplename, re.VERBOSE)),

2328 (citation,

2329 re.compile(r"""

2330 \.\.[ ]+ # explicit markup start

2331 \[(%s)\] # citation label

2332 ([ ]+|$) # whitespace or end of line

2333 """ % Inliner.simplename, re.VERBOSE)),

2334 (hyperlink_target,

2335 re.compile(r"""

2336 \.\.[ ]+ # explicit markup start

2337 _ # target indicator

2338 (?![ ]|$) # first char. not space or EOL

2339 """, re.VERBOSE)),

2340 (substitution_def,

2341 re.compile(r"""

2342 \.\.[ ]+ # explicit markup start

2343 \| # substitution indicator

2344 (?![ ]|$) # first char. not space or EOL

2345 """, re.VERBOSE)),

2346 (directive,

2347 re.compile(r"""

2348 \.\.[ ]+ # explicit markup start

2349 (%s) # directive name

2350 [ ]? # optional space

2351 :: # directive delimiter

2352 ([ ]+|$) # whitespace or end of line

2353 """ % Inliner.simplename, re.VERBOSE))]

2354

2355 def explicit_markup(self, match, context, next_state):

2356 """Footnotes, hyperlink targets, directives, comments."""

2357 nodelist, blank_finish = self.explicit_construct(match)

2358 self.parent += nodelist

2359 self.explicit_list(blank_finish)

2360 return [], next_state, []

2361

2362 def explicit_construct(self, match):

2363 """Determine which explicit construct this is, parse & return it."""

2364 errors = []

2365 for method, pattern in self.explicit.constructs:

2366 expmatch = pattern.match(match.string)

2367 if expmatch:

2368 try:

2369 return method(self, expmatch)

2370 except MarkupError as error:

2371 lineno = self.state_machine.abs_line_number()

2372 message = ' '.join(error.args)

2373 errors.append(self.reporter.warning(message, line=lineno))

2374 break

2375 nodelist, blank_finish = self.comment(match)

2376 return nodelist + errors, blank_finish

2377

2378 def explicit_list(self, blank_finish):

2379 """

2380 Create a nested state machine for a series of explicit markup

2381 constructs (including anonymous hyperlink targets).

2382 """

2383 offset = self.state_machine.line_offset + 1 # next line

2384 newline_offset, blank_finish = self.nested_list_parse(

2385 self.state_machine.input_lines[offset:],

2386 input_offset=self.state_machine.abs_line_offset() + 1,

2387 node=self.parent, initial_state='Explicit',

2388 blank_finish=blank_finish,

2389 match_titles=self.state_machine.match_titles)

2390 self.goto_line(newline_offset)

2391 if not blank_finish:

2392 self.parent += self.unindent_warning('Explicit markup')

2393

2394 def anonymous(self, match, context, next_state):

2395 """Anonymous hyperlink targets."""

2396 nodelist, blank_finish = self.anonymous_target(match)

2397 self.parent += nodelist

2398 self.explicit_list(blank_finish)

2399 return [], next_state, []

2400

2401 def anonymous_target(self, match):

2402 lineno = self.state_machine.abs_line_number()

2403 (block, indent, offset, blank_finish

2404 ) = self.state_machine.get_first_known_indented(match.end(),

2405 until_blank=True)

2406 blocktext = match.string[:match.end()] + '\n'.join(block)

2407 block = [escape2null(line) for line in block]

2408 target = self.make_target(block, blocktext, lineno, '')

2409 return [target], blank_finish

2410

2411 def line(self, match, context, next_state):

2412 """Section title overline or transition marker."""

2413 if self.state_machine.match_titles:

2414 return [match.string], 'Line', []

2415 elif match.string.strip() == '::':

2416 raise statemachine.TransitionCorrection('text')

2417 elif len(match.string.strip()) < 4:

2418 msg = self.reporter.info(

2419 'Unexpected possible title overline or transition.\n'

2420 "Treating it as ordinary text because it's so short.",

2421 line=self.state_machine.abs_line_number())

2422 self.parent += msg

2423 raise statemachine.TransitionCorrection('text')

2424 else:

2425 blocktext = self.state_machine.line

2426 msg = self.reporter.severe(

2427 'Unexpected section title or transition.',

2428 nodes.literal_block(blocktext, blocktext),

2429 line=self.state_machine.abs_line_number())

2430 self.parent += msg

2431 return [], next_state, []

2432

2433 def text(self, match, context, next_state):

2434 """Titles, definition lists, paragraphs."""

2435 return [match.string], 'Text', []

2436

2437

2438class RFC2822Body(Body):

2439

2440 """

2441 RFC2822 headers are only valid as the first constructs in documents. As

2442 soon as anything else appears, the `Body` state should take over.

2443 """

2444

2445 patterns = Body.patterns.copy() # can't modify the original

2446 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'

2447 initial_transitions = [(name, 'Body')

2448 for name in Body.initial_transitions]

2449 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'

2450

2451 def rfc2822(self, match, context, next_state):

2452 """RFC2822-style field list item."""

2453 fieldlist = nodes.field_list(classes=['rfc2822'])

2454 self.parent += fieldlist

2455 field, blank_finish = self.rfc2822_field(match)

2456 fieldlist += field

2457 offset = self.state_machine.line_offset + 1 # next line

2458 newline_offset, blank_finish = self.nested_list_parse(

2459 self.state_machine.input_lines[offset:],

2460 input_offset=self.state_machine.abs_line_offset() + 1,

2461 node=fieldlist, initial_state='RFC2822List',

2462 blank_finish=blank_finish)

2463 self.goto_line(newline_offset)

2464 if not blank_finish:

2465 self.parent += self.unindent_warning(

2466 'RFC2822-style field list')

2467 return [], next_state, []

2468

2469 def rfc2822_field(self, match):

2470 name = match.string[:match.string.find(':')]

2471 (indented, indent, line_offset, blank_finish

2472 ) = self.state_machine.get_first_known_indented(match.end(),

2473 until_blank=True)

2474 fieldnode = nodes.field()

2475 fieldnode += nodes.field_name(name, name)

2476 fieldbody = nodes.field_body('\n'.join(indented))

2477 fieldnode += fieldbody

2478 if indented:

2479 self.nested_parse(indented, input_offset=line_offset,

2480 node=fieldbody)

2481 return fieldnode, blank_finish

2482

2483

2484class SpecializedBody(Body):

2485

2486 """

2487 Superclass for second and subsequent compound element members. Compound

2488 elements are lists and list-like constructs.

2489

2490 All transition methods are disabled (redefined as `invalid_input`).

2491 Override individual methods in subclasses to re-enable.

2492

2493 For example, once an initial bullet list item, say, is recognized, the

2494 `BulletList` subclass takes over, with a "bullet_list" node as its

2495 container. Upon encountering the initial bullet list item, `Body.bullet`

2496 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which

2497 starts up a nested parsing session with `BulletList` as the initial state.

2498 Only the ``bullet`` transition method is enabled in `BulletList`; as long

2499 as only bullet list items are encountered, they are parsed and inserted

2500 into the container. The first construct which is *not* a bullet list item

2501 triggers the `invalid_input` method, which ends the nested parse and

2502 closes the container. `BulletList` needs to recognize input that is

2503 invalid in the context of a bullet list, which means everything *other

2504 than* bullet list items, so it inherits the transition list created in

2505 `Body`.

2506 """

2507

2508 def invalid_input(self, match=None, context=None, next_state=None):

2509 """Not a compound element member. Abort this state machine."""

2510 self.state_machine.previous_line() # back up so parent SM can reassess

2511 raise EOFError

2512

2513 indent = invalid_input

2514 bullet = invalid_input

2515 enumerator = invalid_input

2516 field_marker = invalid_input

2517 option_marker = invalid_input

2518 doctest = invalid_input

2519 line_block = invalid_input

2520 grid_table_top = invalid_input

2521 simple_table_top = invalid_input

2522 explicit_markup = invalid_input

2523 anonymous = invalid_input

2524 line = invalid_input

2525 text = invalid_input

2526

2527

2528class BulletList(SpecializedBody):

2529

2530 """Second and subsequent bullet_list list_items."""

2531

2532 def bullet(self, match, context, next_state):

2533 """Bullet list item."""

2534 if match.string[0] != self.parent['bullet']:

2535 # different bullet: new list

2536 self.invalid_input()

2537 listitem, blank_finish = self.list_item(match.end())

2538 self.parent += listitem

2539 self.blank_finish = blank_finish

2540 return [], next_state, []

2541

2542

2543class DefinitionList(SpecializedBody):

2544

2545 """Second and subsequent definition_list_items."""

2546

2547 def text(self, match, context, next_state):

2548 """Definition lists."""

2549 return [match.string], 'Definition', []

2550

2551

2552class EnumeratedList(SpecializedBody):

2553

2554 """Second and subsequent enumerated_list list_items."""

2555

2556 def enumerator(self, match, context, next_state):

2557 """Enumerated list item."""

2558 format, sequence, text, ordinal = self.parse_enumerator(

2559 match, self.parent['enumtype'])

2560 if (format != self.format

2561 or (sequence != '#' and (sequence != self.parent['enumtype']

2562 or self.auto

2563 or ordinal != (self.lastordinal + 1)))

2564 or not self.is_enumerated_list_item(ordinal, sequence, format)):

2565 # different enumeration: new list

2566 self.invalid_input()

2567 if sequence == '#':

2568 self.auto = 1

2569 listitem, blank_finish = self.list_item(match.end())

2570 self.parent += listitem

2571 self.blank_finish = blank_finish

2572 self.lastordinal = ordinal

2573 return [], next_state, []

2574

2575

2576class FieldList(SpecializedBody):

2577

2578 """Second and subsequent field_list fields."""

2579

2580 def field_marker(self, match, context, next_state):

2581 """Field list field."""

2582 field, blank_finish = self.field(match)

2583 self.parent += field

2584 self.blank_finish = blank_finish

2585 return [], next_state, []

2586

2587

2588class OptionList(SpecializedBody):

2589

2590 """Second and subsequent option_list option_list_items."""

2591

2592 def option_marker(self, match, context, next_state):

2593 """Option list item."""

2594 try:

2595 option_list_item, blank_finish = self.option_list_item(match)

2596 except MarkupError:

2597 self.invalid_input()

2598 self.parent += option_list_item

2599 self.blank_finish = blank_finish

2600 return [], next_state, []

2601

2602

2603class RFC2822List(SpecializedBody, RFC2822Body):

2604

2605 """Second and subsequent RFC2822-style field_list fields."""

2606

2607 patterns = RFC2822Body.patterns

2608 initial_transitions = RFC2822Body.initial_transitions

2609

2610 def rfc2822(self, match, context, next_state):

2611 """RFC2822-style field list item."""

2612 field, blank_finish = self.rfc2822_field(match)

2613 self.parent += field

2614 self.blank_finish = blank_finish

2615 return [], 'RFC2822List', []

2616

2617 blank = SpecializedBody.invalid_input

2618

2619

2620class ExtensionOptions(FieldList):

2621

2622 """

2623 Parse field_list fields for extension options.

2624

2625 No nested parsing is done (including inline markup parsing).

2626 """

2627

2628 def parse_field_body(self, indented, offset, node):

2629 """Override `Body.parse_field_body` for simpler parsing."""

2630 lines = []

2631 for line in list(indented) + ['']:

2632 if line.strip():

2633 lines.append(line)

2634 elif lines:

2635 text = '\n'.join(lines)

2636 node += nodes.paragraph(text, text)

2637 lines = []

2638

2639

2640class LineBlock(SpecializedBody):

2641

2642 """Second and subsequent lines of a line_block."""

2643

2644 blank = SpecializedBody.invalid_input

2645

2646 def line_block(self, match, context, next_state):

2647 """New line of line block."""

2648 lineno = self.state_machine.abs_line_number()

2649 line, messages, blank_finish = self.line_block_line(match, lineno)

2650 self.parent += line

2651 self.parent.parent += messages

2652 self.blank_finish = blank_finish

2653 return [], next_state, []

2654

2655

2656class Explicit(SpecializedBody):

2657

2658 """Second and subsequent explicit markup construct."""

2659

2660 def explicit_markup(self, match, context, next_state):

2661 """Footnotes, hyperlink targets, directives, comments."""

2662 nodelist, blank_finish = self.explicit_construct(match)

2663 self.parent += nodelist

2664 self.blank_finish = blank_finish

2665 return [], next_state, []

2666

2667 def anonymous(self, match, context, next_state):

2668 """Anonymous hyperlink targets."""

2669 nodelist, blank_finish = self.anonymous_target(match)

2670 self.parent += nodelist

2671 self.blank_finish = blank_finish

2672 return [], next_state, []

2673

2674 blank = SpecializedBody.invalid_input

2675

2676

2677class SubstitutionDef(Body):

2678

2679 """

2680 Parser for the contents of a substitution_definition element.

2681 """

2682

2683 patterns = {

2684 'embedded_directive': re.compile(r'(%s)::( +|$)'

2685 % Inliner.simplename),

2686 'text': r''}

2687 initial_transitions = ['embedded_directive', 'text']

2688

2689 def embedded_directive(self, match, context, next_state):

2690 nodelist, blank_finish = self.directive(match,

2691 alt=self.parent['names'][0])

2692 self.parent += nodelist

2693 if not self.state_machine.at_eof():

2694 self.blank_finish = blank_finish

2695 raise EOFError

2696

2697 def text(self, match, context, next_state):

2698 if not self.state_machine.at_eof():

2699 self.blank_finish = self.state_machine.is_next_line_blank()

2700 raise EOFError

2701

2702

2703class Text(RSTState):

2704

2705 """

2706 Classifier of second line of a text block.

2707

2708 Could be a paragraph, a definition list item, or a title.

2709 """

2710

2711 patterns = {'underline': Body.patterns['line'],

2712 'text': r''}

2713 initial_transitions = [('underline', 'Body'), ('text', 'Body')]

2714

2715 def blank(self, match, context, next_state):

2716 """End of paragraph."""

2717 # NOTE: self.paragraph returns [node, system_message(s)], literalnext

2718 paragraph, literalnext = self.paragraph(

2719 context, self.state_machine.abs_line_number() - 1)

2720 self.parent += paragraph

2721 if literalnext:

2722 self.parent += self.literal_block()

2723 return [], 'Body', []

2724

2725 def eof(self, context):

2726 if context:

2727 self.blank(None, context, None)

2728 return []

2729

2730 def indent(self, match, context, next_state):

2731 """Definition list item."""

2732 dl = nodes.definition_list()

2733 # the definition list starts on the line before the indent:

2734 lineno = self.state_machine.abs_line_number() - 1

2735 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)

2736 dl_item, blank_finish = self.definition_list_item(context)

2737 dl += dl_item

2738 self.parent += dl

2739 offset = self.state_machine.line_offset + 1 # next line

2740 newline_offset, blank_finish = self.nested_list_parse(

2741 self.state_machine.input_lines[offset:],

2742 input_offset=self.state_machine.abs_line_offset() + 1,

2743 node=dl, initial_state='DefinitionList',

2744 blank_finish=blank_finish, blank_finish_state='Definition')

2745 self.goto_line(newline_offset)

2746 if not blank_finish:

2747 self.parent += self.unindent_warning('Definition list')

2748 return [], 'Body', []

2749

2750 def underline(self, match, context, next_state):

2751 """Section title."""

2752 lineno = self.state_machine.abs_line_number()

2753 title = context[0].rstrip()

2754 underline = match.string.rstrip()

2755 source = title + '\n' + underline

2756 messages = []

2757 if column_width(title) > len(underline):

2758 if len(underline) < 4:

2759 if self.state_machine.match_titles:

2760 msg = self.reporter.info(

2761 'Possible title underline, too short for the title.\n'

2762 "Treating it as ordinary text because it's so short.",

2763 line=lineno)

2764 self.parent += msg

2765 raise statemachine.TransitionCorrection('text')

2766 else:

2767 blocktext = context[0] + '\n' + self.state_machine.line

2768 msg = self.reporter.warning(

2769 'Title underline too short.',

2770 nodes.literal_block(blocktext, blocktext),

2771 line=lineno)

2772 messages.append(msg)

2773 if not self.state_machine.match_titles:

2774 blocktext = context[0] + '\n' + self.state_machine.line

2775 # We need get_source_and_line() here to report correctly

2776 src, srcline = self.state_machine.get_source_and_line()

2777 # TODO: why is abs_line_number() == srcline+1

2778 # if the error is in a table (try with test_tables.py)?

2779 # print("get_source_and_line", srcline)

2780 # print("abs_line_number", self.state_machine.abs_line_number())

2781 msg = self.reporter.severe(

2782 'Unexpected section title.',

2783 nodes.literal_block(blocktext, blocktext),

2784 source=src, line=srcline)

2785 self.parent += messages

2786 self.parent += msg

2787 return [], next_state, []

2788 style = underline[0]

2789 context[:] = []

2790 self.section(title, source, style, lineno - 1, messages)

2791 return [], next_state, []

2792

2793 def text(self, match, context, next_state):

2794 """Paragraph."""

2795 startline = self.state_machine.abs_line_number() - 1

2796 msg = None

2797 try:

2798 block = self.state_machine.get_text_block(flush_left=True)

2799 except statemachine.UnexpectedIndentationError as err:

2800 block, src, srcline = err.args

2801 msg = self.reporter.error('Unexpected indentation.',

2802 source=src, line=srcline)

2803 lines = context + list(block)

2804 paragraph, literalnext = self.paragraph(lines, startline)

2805 self.parent += paragraph

2806 self.parent += msg

2807 if literalnext:

2808 try:

2809 self.state_machine.next_line()

2810 except EOFError:

2811 pass

2812 self.parent += self.literal_block()

2813 return [], next_state, []

2814

2815 def literal_block(self):

2816 """Return a list of nodes."""

2817 (indented, indent, offset, blank_finish

2818 ) = self.state_machine.get_indented()

2819 while indented and not indented[-1].strip():

2820 indented.trim_end()

2821 if not indented:

2822 return self.quoted_literal_block()

2823 data = '\n'.join(indented)

2824 literal_block = nodes.literal_block(data, data)

2825 (literal_block.source,

2826 literal_block.line) = self.state_machine.get_source_and_line(offset+1)

2827 nodelist = [literal_block]

2828 if not blank_finish:

2829 nodelist.append(self.unindent_warning('Literal block'))

2830 return nodelist

2831

2832 def quoted_literal_block(self):

2833 abs_line_offset = self.state_machine.abs_line_offset()

2834 offset = self.state_machine.line_offset

2835 parent_node = nodes.Element()

2836 new_abs_offset = self.nested_parse(

2837 self.state_machine.input_lines[offset:],

2838 input_offset=abs_line_offset, node=parent_node, match_titles=False,

2839 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),

2840 'initial_state': 'QuotedLiteralBlock'})

2841 self.goto_line(new_abs_offset)

2842 return parent_node.children

2843

2844 def definition_list_item(self, termline):

2845 # the parser is already on the second (indented) line:

2846 dd_lineno = self.state_machine.abs_line_number()

2847 dt_lineno = dd_lineno - 1

2848 (indented, indent, line_offset, blank_finish

2849 ) = self.state_machine.get_indented()

2850 dl_item = nodes.definition_list_item(

2851 '\n'.join(termline + list(indented)))

2852 (dl_item.source,

2853 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)

2854 dt_nodes, messages = self.term(termline, dt_lineno)

2855 dl_item += dt_nodes

2856 dd = nodes.definition('', *messages)

2857 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)

2858 dl_item += dd

2859 if termline[0][-2:] == '::':

2860 dd += self.reporter.info(

2861 'Blank line missing before literal block (after the "::")? '

2862 'Interpreted as a definition list item.',

2863 line=dd_lineno)

2864 # TODO: drop a definition if it is an empty comment to allow

2865 # definition list items with several terms?

2866 # https://sourceforge.net/p/docutils/feature-requests/60/

2867 self.nested_parse(indented, input_offset=line_offset, node=dd)

2868 return dl_item, blank_finish

2869

2870 classifier_delimiter = re.compile(' +: +')

2871

2872 def term(self, lines, lineno):

2873 """Return a definition_list's term and optional classifiers."""

2874 assert len(lines) == 1

2875 text_nodes, messages = self.inline_text(lines[0], lineno)

2876 dt = nodes.term(lines[0])

2877 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)

2878 node_list = [dt]

2879 for i in range(len(text_nodes)):

2880 node = text_nodes[i]

2881 if isinstance(node, nodes.Text):

2882 parts = self.classifier_delimiter.split(node)

2883 if len(parts) == 1:

2884 node_list[-1] += node

2885 else:

2886 text = parts[0].rstrip()

2887 textnode = nodes.Text(text)

2888 node_list[-1] += textnode

2889 for part in parts[1:]:

2890 node_list.append(

2891 nodes.classifier(unescape(part, True), part))

2892 else:

2893 node_list[-1] += node

2894 return node_list, messages

2895

2896

2897class SpecializedText(Text):

2898

2899 """

2900 Superclass for second and subsequent lines of Text-variants.

2901

2902 All transition methods are disabled. Override individual methods in

2903 subclasses to re-enable.

2904 """

2905

2906 def eof(self, context):

2907 """Incomplete construct."""

2908 return []

2909

2910 def invalid_input(self, match=None, context=None, next_state=None):

2911 """Not a compound element member. Abort this state machine."""

2912 raise EOFError

2913

2914 blank = invalid_input

2915 indent = invalid_input

2916 underline = invalid_input

2917 text = invalid_input

2918

2919

2920class Definition(SpecializedText):

2921

2922 """Second line of potential definition_list_item."""

2923

2924 def eof(self, context):

2925 """Not a definition."""

2926 self.state_machine.previous_line(2) # so parent SM can reassess

2927 return []

2928

2929 def indent(self, match, context, next_state):

2930 """Definition list item."""

2931 dl_item, blank_finish = self.definition_list_item(context)

2932 self.parent += dl_item

2933 self.blank_finish = blank_finish

2934 return [], 'DefinitionList', []

2935

2936

2937class Line(SpecializedText):

2938

2939 """

2940 Second line of over- & underlined section title or transition marker.

2941 """

2942

2943 eofcheck = 1 # @@@ ???

2944 """Set to 0 while parsing sections, so that we don't catch the EOF."""

2945

2946 def eof(self, context):

2947 """Transition marker at end of section or document."""

2948 marker = context[0].strip()

2949 if self.memo.section_bubble_up_kludge:

2950 self.memo.section_bubble_up_kludge = False

2951 elif len(marker) < 4:

2952 self.state_correction(context)

2953 if self.eofcheck: # ignore EOFError with sections

2954 src, srcline = self.state_machine.get_source_and_line()

2955 # lineno = self.state_machine.abs_line_number() - 1

2956 transition = nodes.transition(rawsource=context[0])

2957 transition.source = src

2958 transition.line = srcline - 1

2959 # transition.line = lineno

2960 self.parent += transition

2961 self.eofcheck = 1

2962 return []

2963

2964 def blank(self, match, context, next_state):

2965 """Transition marker."""

2966 src, srcline = self.state_machine.get_source_and_line()

2967 marker = context[0].strip()

2968 if len(marker) < 4:

2969 self.state_correction(context)

2970 transition = nodes.transition(rawsource=marker)

2971 transition.source = src

2972 transition.line = srcline - 1

2973 self.parent += transition

2974 return [], 'Body', []

2975

2976 def text(self, match, context, next_state):

2977 """Potential over- & underlined title."""

2978 lineno = self.state_machine.abs_line_number() - 1

2979 overline = context[0]

2980 title = match.string

2981 underline = ''

2982 try:

2983 underline = self.state_machine.next_line()

2984 except EOFError:

2985 blocktext = overline + '\n' + title

2986 if len(overline.rstrip()) < 4:

2987 self.short_overline(context, blocktext, lineno, 2)

2988 else:

2989 msg = self.reporter.severe(

2990 'Incomplete section title.',

2991 nodes.literal_block(blocktext, blocktext),

2992 line=lineno)

2993 self.parent += msg

2994 return [], 'Body', []

2995 source = '%s\n%s\n%s' % (overline, title, underline)

2996 overline = overline.rstrip()

2997 underline = underline.rstrip()

2998 if not self.transitions['underline'][0].match(underline):

2999 blocktext = overline + '\n' + title + '\n' + underline

3000 if len(overline.rstrip()) < 4:

3001 self.short_overline(context, blocktext, lineno, 2)

3002 else:

3003 msg = self.reporter.severe(

3004 'Missing matching underline for section title overline.',

3005 nodes.literal_block(source, source),

3006 line=lineno)

3007 self.parent += msg

3008 return [], 'Body', []

3009 elif overline != underline:

3010 blocktext = overline + '\n' + title + '\n' + underline

3011 if len(overline.rstrip()) < 4:

3012 self.short_overline(context, blocktext, lineno, 2)

3013 else:

3014 msg = self.reporter.severe(

3015 'Title overline & underline mismatch.',

3016 nodes.literal_block(source, source),

3017 line=lineno)

3018 self.parent += msg

3019 return [], 'Body', []

3020 title = title.rstrip()

3021 messages = []

3022 if column_width(title) > len(overline):

3023 blocktext = overline + '\n' + title + '\n' + underline

3024 if len(overline.rstrip()) < 4:

3025 self.short_overline(context, blocktext, lineno, 2)

3026 else:

3027 msg = self.reporter.warning(

3028 'Title overline too short.',

3029 nodes.literal_block(source, source),

3030 line=lineno)

3031 messages.append(msg)

3032 style = (overline[0], underline[0])

3033 self.eofcheck = 0 # @@@ not sure this is correct

3034 self.section(title.lstrip(), source, style, lineno + 1, messages)

3035 self.eofcheck = 1

3036 return [], 'Body', []

3037

3038 indent = text # indented title

3039

3040 def underline(self, match, context, next_state):

3041 overline = context[0]

3042 blocktext = overline + '\n' + self.state_machine.line

3043 lineno = self.state_machine.abs_line_number() - 1

3044 if len(overline.rstrip()) < 4:

3045 self.short_overline(context, blocktext, lineno, 1)

3046 msg = self.reporter.error(

3047 'Invalid section title or transition marker.',

3048 nodes.literal_block(blocktext, blocktext),

3049 line=lineno)

3050 self.parent += msg

3051 return [], 'Body', []

3052

3053 def short_overline(self, context, blocktext, lineno, lines=1):

3054 msg = self.reporter.info(

3055 'Possible incomplete section title.\nTreating the overline as '

3056 "ordinary text because it's so short.",

3057 line=lineno)

3058 self.parent += msg

3059 self.state_correction(context, lines)

3060

3061 def state_correction(self, context, lines=1):

3062 self.state_machine.previous_line(lines)

3063 context[:] = []

3064 raise statemachine.StateCorrection('Body', 'text')

3065

3066

3067class QuotedLiteralBlock(RSTState):

3068

3069 """

3070 Nested parse handler for quoted (unindented) literal blocks.

3071

3072 Special-purpose. Not for inclusion in `state_classes`.

3073 """

3074

3075 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,

3076 'text': r''}

3077 initial_transitions = ('initial_quoted', 'text')

3078

3079 def __init__(self, state_machine, debug=False):

3080 RSTState.__init__(self, state_machine, debug)

3081 self.messages = []

3082 self.initial_lineno = None

3083

3084 def blank(self, match, context, next_state):

3085 if context:

3086 raise EOFError

3087 else:

3088 return context, next_state, []

3089

3090 def eof(self, context):

3091 if context:

3092 src, srcline = self.state_machine.get_source_and_line(

3093 self.initial_lineno)

3094 text = '\n'.join(context)

3095 literal_block = nodes.literal_block(text, text)

3096 literal_block.source = src

3097 literal_block.line = srcline

3098 self.parent += literal_block

3099 else:

3100 self.parent += self.reporter.warning(

3101 'Literal block expected; none found.',

3102 line=self.state_machine.abs_line_number()

3103 ) # src not available, statemachine.input_lines is empty

3104 self.state_machine.previous_line()

3105 self.parent += self.messages

3106 return []

3107

3108 def indent(self, match, context, next_state):

3109 assert context, ('QuotedLiteralBlock.indent: context should not '

3110 'be empty!')

3111 self.messages.append(

3112 self.reporter.error('Unexpected indentation.',

3113 line=self.state_machine.abs_line_number()))

3114 self.state_machine.previous_line()

3115 raise EOFError

3116

3117 def initial_quoted(self, match, context, next_state):

3118 """Match arbitrary quote character on the first line only."""

3119 self.remove_transition('initial_quoted')

3120 quote = match.string[0]

3121 pattern = re.compile(re.escape(quote))

3122 # New transition matches consistent quotes only:

3123 self.add_transition('quoted',

3124 (pattern, self.quoted, self.__class__.__name__))

3125 self.initial_lineno = self.state_machine.abs_line_number()

3126 return [match.string], next_state, []

3127

3128 def quoted(self, match, context, next_state):

3129 """Match consistent quotes on subsequent lines."""

3130 context.append(match.string)

3131 return context, next_state, []

3132

3133 def text(self, match, context, next_state):

3134 if context:

3135 self.messages.append(

3136 self.reporter.error('Inconsistent literal block quoting.',

3137 line=self.state_machine.abs_line_number()))

3138 self.state_machine.previous_line()

3139 raise EOFError

3140

3141

3142state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,

3143 OptionList, LineBlock, ExtensionOptions, Explicit, Text,

3144 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)

3145"""Standard set of State classes used to start `RSTStateMachine`."""