Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/docutils/parsers/rst/states.py: 86%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Copyright: This module has been placed in the public domain.

5"""

6This is the ``docutils.parsers.rst.states`` module, the core of

7the reStructuredText parser. It defines the following:

9:Classes:

10 - `RSTStateMachine`: reStructuredText parser's entry point.

11 - `NestedStateMachine`: recursive StateMachine.

12 - `RSTState`: reStructuredText State superclass.

13 - `Inliner`: For parsing inline markup.

14 - `Body`: Generic classifier of the first line of a block.

15 - `SpecializedBody`: Superclass for compound element members.

16 - `BulletList`: Second and subsequent bullet_list list_items

17 - `DefinitionList`: Second+ definition_list_items.

18 - `EnumeratedList`: Second+ enumerated_list list_items.

19 - `FieldList`: Second+ fields.

20 - `OptionList`: Second+ option_list_items.

21 - `RFC2822List`: Second+ RFC2822-style fields.

22 - `ExtensionOptions`: Parses directive option fields.

23 - `Explicit`: Second+ explicit markup constructs.

24 - `SubstitutionDef`: For embedded directives in substitution definitions.

25 - `Text`: Classifier of second line of a text block.

26 - `SpecializedText`: Superclass for continuation lines of Text-variants.

27 - `Definition`: Second line of potential definition_list_item.

28 - `Line`: Second line of overlined section title or transition marker.

29 - `Struct`: An auxiliary collection class.

31:Exception classes:

32 - `MarkupError`

33 - `ParserError`

34 - `MarkupMismatch`

36:Functions:

37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.

38 - `unescape()`: Return a string, nulls removed or restored to backslashes.

40:Attributes:

41 - `state_classes`: set of State classes used with `RSTStateMachine`.

43Parser Overview

44===============

46The reStructuredText parser is implemented as a recursive state machine,

47examining its input one line at a time. To understand how the parser works,

48please first become familiar with the `docutils.statemachine` module. In the

49description below, references are made to classes defined in this module;

50please see the individual classes for details.

52Parsing proceeds as follows:

541. The state machine examines each line of input, checking each of the

55 transition patterns of the state `Body`, in order, looking for a match.

56 The implicit transitions (blank lines and indentation) are checked before

57 any others. The 'text' transition is a catch-all (matches anything).

592. The method associated with the matched transition pattern is called.

61 A. Some transition methods are self-contained, appending elements to the

62 document tree (`Body.doctest` parses a doctest block). The parser's

63 current line index is advanced to the end of the element, and parsing

64 continues with step 1.

66 B. Other transition methods trigger the creation of a nested state machine,

67 whose job is to parse a compound construct ('indent' does a block quote,

68 'bullet' does a bullet list, 'overline' does a section [first checking

69 for a valid section header], etc.).

71 - In the case of lists and explicit markup, a one-off state machine is

72 created and run to parse contents of the first item.

74 - A new state machine is created and its initial state is set to the

75 appropriate specialized state (`BulletList` in the case of the

76 'bullet' transition; see `SpecializedBody` for more detail). This

77 state machine is run to parse the compound element (or series of

78 explicit markup elements), and returns as soon as a non-member element

79 is encountered. For example, the `BulletList` state machine ends as

80 soon as it encounters an element which is not a list item of that

81 bullet list. The optional omission of inter-element blank lines is

82 enabled by this nested state machine.

84 - The current line index is advanced to the end of the elements parsed,

85 and parsing continues with step 1.

87 C. The result of the 'text' transition depends on the next line of text.

88 The current state is changed to `Text`, under which the second line is

89 examined. If the second line is:

91 - Indented: The element is a definition list item, and parsing proceeds

92 similarly to step 2.B, using the `DefinitionList` state.

94 - A line of uniform punctuation characters: The element is a section

95 header; again, parsing proceeds as in step 2.B, and `Body` is still

96 used.

98 - Anything else: The element is a paragraph, which is examined for

99 inline markup and appended to the parent element. Processing

100 continues with step 1.

101"""

102

103__docformat__ = 'reStructuredText'

104

105

106import re

107from types import FunctionType, MethodType

108

109from docutils import nodes, statemachine, utils

110from docutils import ApplicationError, DataError

111from docutils.statemachine import StateMachineWS, StateWS

112from docutils.nodes import fully_normalize_name as normalize_name

113from docutils.nodes import unescape, whitespace_normalize_name

114import docutils.parsers.rst

115from docutils.parsers.rst import directives, languages, tableparser, roles

116from docutils.utils import escape2null, column_width

117from docutils.utils import punctuation_chars, roman, urischemes

118from docutils.utils import split_escaped_whitespace

119

120

121class MarkupError(DataError): pass

122class UnknownInterpretedRoleError(DataError): pass

123class InterpretedRoleNotImplementedError(DataError): pass

124class ParserError(ApplicationError): pass

125class MarkupMismatch(Exception): pass

126

127

128class Struct:

129

130 """Stores data attributes for dotted-attribute access."""

131

132 def __init__(self, **keywordargs):

133 self.__dict__.update(keywordargs)

134

135

136class RSTStateMachine(StateMachineWS):

137

138 """

139 reStructuredText's master StateMachine.

140

141 The entry point to reStructuredText parsing is the `run()` method.

142 """

143

144 def run(self, input_lines, document, input_offset=0, match_titles=True,

145 inliner=None):

146 """

147 Parse `input_lines` and modify the `document` node in place.

148

149 Extend `StateMachineWS.run()`: set up parse-global data and

150 run the StateMachine.

151 """

152 self.language = languages.get_language(

153 document.settings.language_code, document.reporter)

154 self.match_titles = match_titles

155 if inliner is None:

156 inliner = Inliner()

157 inliner.init_customizations(document.settings)

158 self.memo = Struct(document=document,

159 reporter=document.reporter,

160 language=self.language,

161 title_styles=[],

162 section_level=0,

163 section_bubble_up_kludge=False,

164 inliner=inliner)

165 self.document = document

166 self.attach_observer(document.note_source)

167 self.reporter = self.memo.reporter

168 self.node = document

169 results = StateMachineWS.run(self, input_lines, input_offset,

170 input_source=document['source'])

171 assert results == [], 'RSTStateMachine.run() results should be empty!'

172 self.node = self.memo = None # remove unneeded references

173

174

175class NestedStateMachine(StateMachineWS):

176

177 """

178 StateMachine run from within other StateMachine runs, to parse nested

179 document structures.

180 """

181

182 def run(self, input_lines, input_offset, memo, node, match_titles=True):

183 """

184 Parse `input_lines` and populate a `docutils.nodes.document` instance.

185

186 Extend `StateMachineWS.run()`: set up document-wide data.

187 """

188 self.match_titles = match_titles

189 self.memo = memo

190 self.document = memo.document

191 self.attach_observer(self.document.note_source)

192 self.reporter = memo.reporter

193 self.language = memo.language

194 self.node = node

195 results = StateMachineWS.run(self, input_lines, input_offset)

196 assert results == [], ('NestedStateMachine.run() results should be '

197 'empty!')

198 return results

199

200

201class RSTState(StateWS):

202

203 """

204 reStructuredText State superclass.

205

206 Contains methods used by all State subclasses.

207 """

208

209 nested_sm = NestedStateMachine

210 nested_sm_cache = []

211

212 def __init__(self, state_machine, debug=False):

213 self.nested_sm_kwargs = {'state_classes': state_classes,

214 'initial_state': 'Body'}

215 StateWS.__init__(self, state_machine, debug)

216

217 def runtime_init(self):

218 StateWS.runtime_init(self)

219 memo = self.state_machine.memo

220 self.memo = memo

221 self.reporter = memo.reporter

222 self.inliner = memo.inliner

223 self.document = memo.document

224 self.parent = self.state_machine.node

225 # enable the reporter to determine source and source-line

226 if not hasattr(self.reporter, 'get_source_and_line'):

227 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501

228

229 def goto_line(self, abs_line_offset):

230 """

231 Jump to input line `abs_line_offset`, ignoring jumps past the end.

232 """

233 try:

234 self.state_machine.goto_line(abs_line_offset)

235 except EOFError:

236 pass

237

238 def no_match(self, context, transitions):

239 """

240 Override `StateWS.no_match` to generate a system message.

241

242 This code should never be run.

243 """

244 self.reporter.severe(

245 'Internal error: no transition pattern match. State: "%s"; '

246 'transitions: %s; context: %s; current line: %r.'

247 % (self.__class__.__name__, transitions, context,

248 self.state_machine.line))

249 return context, None, []

250

251 def bof(self, context):

252 """Called at beginning of file."""

253 return [], []

254

255 def nested_parse(self, block, input_offset, node, match_titles=False,

256 state_machine_class=None, state_machine_kwargs=None):

257 """

258 Create a new StateMachine rooted at `node` and run it over the input

259 `block`.

260 """

261 use_default = 0

262 if state_machine_class is None:

263 state_machine_class = self.nested_sm

264 use_default += 1

265 if state_machine_kwargs is None:

266 state_machine_kwargs = self.nested_sm_kwargs

267 use_default += 1

268 block_length = len(block)

269

270 state_machine = None

271 if use_default == 2:

272 try:

273 state_machine = self.nested_sm_cache.pop()

274 except IndexError:

275 pass

276 if not state_machine:

277 state_machine = state_machine_class(debug=self.debug,

278 **state_machine_kwargs)

279 state_machine.run(block, input_offset, memo=self.memo,

280 node=node, match_titles=match_titles)

281 if use_default == 2:

282 self.nested_sm_cache.append(state_machine)

283 else:

284 state_machine.unlink()

285 new_offset = state_machine.abs_line_offset()

286 # No `block.parent` implies disconnected -- lines aren't in sync:

287 if block.parent and (len(block) - block_length) != 0:

288 # Adjustment for block if modified in nested parse:

289 self.state_machine.next_line(len(block) - block_length)

290 return new_offset

291

292 def nested_list_parse(self, block, input_offset, node, initial_state,

293 blank_finish,

294 blank_finish_state=None,

295 extra_settings={},

296 match_titles=False,

297 state_machine_class=None,

298 state_machine_kwargs=None):

299 """

300 Create a new StateMachine rooted at `node` and run it over the input

301 `block`. Also keep track of optional intermediate blank lines and the

302 required final one.

303 """

304 if state_machine_class is None:

305 state_machine_class = self.nested_sm

306 if state_machine_kwargs is None:

307 state_machine_kwargs = self.nested_sm_kwargs.copy()

308 state_machine_kwargs['initial_state'] = initial_state

309 state_machine = state_machine_class(debug=self.debug,

310 **state_machine_kwargs)

311 if blank_finish_state is None:

312 blank_finish_state = initial_state

313 state_machine.states[blank_finish_state].blank_finish = blank_finish

314 for key, value in extra_settings.items():

315 setattr(state_machine.states[initial_state], key, value)

316 state_machine.run(block, input_offset, memo=self.memo,

317 node=node, match_titles=match_titles)

318 blank_finish = state_machine.states[blank_finish_state].blank_finish

319 state_machine.unlink()

320 return state_machine.abs_line_offset(), blank_finish

321

322 def section(self, title, source, style, lineno, messages):

323 """Check for a valid subsection and create one if it checks out."""

324 if self.check_subsection(source, style, lineno):

325 self.new_subsection(title, lineno, messages)

326

327 def check_subsection(self, source, style, lineno):

328 """

329 Check for a valid subsection header. Return True or False.

330

331 When a new section is reached that isn't a subsection of the current

332 section, back up the line count (use ``previous_line(-x)``), then

333 ``raise EOFError``. The current StateMachine will finish, then the

334 calling StateMachine can re-examine the title. This will work its way

335 back up the calling chain until the correct section level isreached.

336

337 @@@ Alternative: Evaluate the title, store the title info & level, and

338 back up the chain until that level is reached. Store in memo? Or

339 return in results?

340

341 :Exception: `EOFError` when a sibling or supersection encountered.

342 """

343 memo = self.memo

344 title_styles = memo.title_styles

345 mylevel = memo.section_level

346 try: # check for existing title style

347 level = title_styles.index(style) + 1

348 except ValueError: # new title style

349 if len(title_styles) == memo.section_level: # new subsection

350 title_styles.append(style)

351 return True

352 else: # not at lowest level

353 self.parent += self.title_inconsistent(source, lineno)

354 return False

355 if level <= mylevel: # sibling or supersection

356 memo.section_level = level # bubble up to parent section

357 if len(style) == 2:

358 memo.section_bubble_up_kludge = True

359 # back up 2 lines for underline title, 3 for overline title

360 self.state_machine.previous_line(len(style) + 1)

361 raise EOFError # let parent section re-evaluate

362 if level == mylevel + 1: # immediate subsection

363 return True

364 else: # invalid subsection

365 self.parent += self.title_inconsistent(source, lineno)

366 return False

367

368 def title_inconsistent(self, sourcetext, lineno):

369 error = self.reporter.severe(

370 'Title level inconsistent:', nodes.literal_block('', sourcetext),

371 line=lineno)

372 return error

373

374 def new_subsection(self, title, lineno, messages):

375 """Append new subsection to document tree. On return, check level."""

376 memo = self.memo

377 mylevel = memo.section_level

378 memo.section_level += 1

379 section_node = nodes.section()

380 self.parent += section_node

381 textnodes, title_messages = self.inline_text(title, lineno)

382 titlenode = nodes.title(title, '', *textnodes)

383 name = normalize_name(titlenode.astext())

384 section_node['names'].append(name)

385 section_node += titlenode

386 section_node += messages

387 section_node += title_messages

388 self.document.note_implicit_target(section_node, section_node)

389 offset = self.state_machine.line_offset + 1

390 absoffset = self.state_machine.abs_line_offset() + 1

391 newabsoffset = self.nested_parse(

392 self.state_machine.input_lines[offset:], input_offset=absoffset,

393 node=section_node, match_titles=True)

394 self.goto_line(newabsoffset)

395 if memo.section_level <= mylevel: # can't handle next section?

396 raise EOFError # bubble up to supersection

397 # reset section_level; next pass will detect it properly

398 memo.section_level = mylevel

399

400 def paragraph(self, lines, lineno):

401 """

402 Return a list (paragraph & messages) & a boolean: literal_block next?

403 """

404 data = '\n'.join(lines).rstrip()

405 if re.search(r'(?<!\\)(\\\\)*::$', data):

406 if len(data) == 2:

407 return [], 1

408 elif data[-3] in ' \n':

409 text = data[:-3].rstrip()

410 else:

411 text = data[:-1]

412 literalnext = 1

413 else:

414 text = data

415 literalnext = 0

416 textnodes, messages = self.inline_text(text, lineno)

417 p = nodes.paragraph(data, '', *textnodes)

418 p.source, p.line = self.state_machine.get_source_and_line(lineno)

419 return [p] + messages, literalnext

420

421 def inline_text(self, text, lineno):

422 """

423 Return 2 lists: nodes (text and inline elements), and system_messages.

424 """

425 nodes, messages = self.inliner.parse(text, lineno,

426 self.memo, self.parent)

427 return nodes, messages

428

429 def unindent_warning(self, node_name):

430 # the actual problem is one line below the current line

431 lineno = self.state_machine.abs_line_number() + 1

432 return self.reporter.warning('%s ends without a blank line; '

433 'unexpected unindent.' % node_name,

434 line=lineno)

435

436

437def build_regexp(definition, compile=True):

438 """

439 Build, compile and return a regular expression based on `definition`.

440

441 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),

442 where "parts" is a list of regular expressions and/or regular

443 expression definitions to be joined into an or-group.

444 """

445 name, prefix, suffix, parts = definition

446 part_strings = []

447 for part in parts:

448 if isinstance(part, tuple):

449 part_strings.append(build_regexp(part, None))

450 else:

451 part_strings.append(part)

452 or_group = '|'.join(part_strings)

453 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()

454 if compile:

455 return re.compile(regexp)

456 else:

457 return regexp

458

459

460class Inliner:

461

462 """

463 Parse inline markup; call the `parse()` method.

464 """

465

466 def __init__(self):

467 self.implicit_dispatch = []

468 """List of (pattern, bound method) tuples, used by

469 `self.implicit_inline`."""

470

471 def init_customizations(self, settings):

472 # lookahead and look-behind expressions for inline markup rules

473 if getattr(settings, 'character_level_inline_markup', False):

474 start_string_prefix = '(^|(?<!\x00))'

475 end_string_suffix = ''

476 else:

477 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %

478 (punctuation_chars.openers,

479 punctuation_chars.delimiters))

480 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %

481 (punctuation_chars.closing_delimiters,

482 punctuation_chars.delimiters,

483 punctuation_chars.closers))

484 args = locals().copy()

485 args.update(vars(self.__class__))

486

487 parts = ('initial_inline', start_string_prefix, '',

488 [

489 ('start', '', self.non_whitespace_after, # simple start-strings

490 [r'\*\*', # strong

491 r'\*(?!\*)', # emphasis but not strong

492 r'``', # literal

493 r'_`', # inline internal target

494 r'\|(?!\|)'] # substitution reference

495 ),

496 ('whole', '', end_string_suffix, # whole constructs

497 [ # reference name & end-string

498 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,

499 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',

500 [r'[0-9]+', # manually numbered

501 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)

502 r'\*', # auto-symbol

503 r'(?P<citationlabel>%s)' % self.simplename, # citation ref

504 ]

505 )

506 ]

507 ),

508 ('backquote', # interpreted text or phrase reference

509 '(?P<role>(:%s:)?)' % self.simplename, # optional role

510 self.non_whitespace_after,

511 ['`(?!`)'] # but not literal

512 )

513 ]

514 )

515 self.start_string_prefix = start_string_prefix

516 self.end_string_suffix = end_string_suffix

517 self.parts = parts

518

519 self.patterns = Struct(

520 initial=build_regexp(parts),

521 emphasis=re.compile(self.non_whitespace_escape_before

522 + r'(\*)' + end_string_suffix),

523 strong=re.compile(self.non_whitespace_escape_before

524 + r'(\*\*)' + end_string_suffix),

525 interpreted_or_phrase_ref=re.compile(

526 r"""

527 %(non_unescaped_whitespace_escape_before)s

528 (

529 `

530 (?P<suffix>

531 (?P<role>:%(simplename)s:)?

532 (?P<refend>__?)?

533 )

534 )

535 %(end_string_suffix)s

536 """ % args, re.VERBOSE),

537 embedded_link=re.compile(

538 r"""

539 (

540 (?:[ \n]+|^) # spaces or beginning of line/string

541 < # open bracket

542 %(non_whitespace_after)s

543 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets

544 %(non_whitespace_escape_before)s

545 > # close bracket

546 )

547 $ # end of string

548 """ % args, re.VERBOSE),

549 literal=re.compile(self.non_whitespace_before + '(``)'

550 + end_string_suffix),

551 target=re.compile(self.non_whitespace_escape_before

552 + r'(`)' + end_string_suffix),

553 substitution_ref=re.compile(self.non_whitespace_escape_before

554 + r'(\|_{0,2})'

555 + end_string_suffix),

556 email=re.compile(self.email_pattern % args + '$',

557 re.VERBOSE),

558 uri=re.compile(

559 (r"""

560 %(start_string_prefix)s

561 (?P<whole>

562 (?P<absolute> # absolute URI

563 (?P<scheme> # scheme (http, ftp, mailto)

564 [a-zA-Z][a-zA-Z0-9.+-]*

565 )

566 :

567 (

568 ( # either:

569 (//?)? # hierarchical URI

570 %(uric)s* # URI characters

571 %(uri_end)s # final URI char

572 )

573 ( # optional query

574 \?%(uric)s*

575 %(uri_end)s

576 )?

577 ( # optional fragment

578 \#%(uric)s*

579 %(uri_end)s

580 )?

581 )

582 )

583 | # *OR*

584 (?P<email> # email address

585 """ + self.email_pattern + r"""

586 )

587 )

588 %(end_string_suffix)s

589 """) % args, re.VERBOSE),

590 pep=re.compile(

591 r"""

592 %(start_string_prefix)s

593 (

594 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file

595 |

596 (PEP\s+(?P<pepnum2>\d+)) # reference by name

597 )

598 %(end_string_suffix)s""" % args, re.VERBOSE),

599 rfc=re.compile(

600 r"""

601 %(start_string_prefix)s

602 (RFC(-|\s+)?(?P<rfcnum>\d+))

603 %(end_string_suffix)s""" % args, re.VERBOSE))

604

605 self.implicit_dispatch.append((self.patterns.uri,

606 self.standalone_uri))

607 if settings.pep_references:

608 self.implicit_dispatch.append((self.patterns.pep,

609 self.pep_reference))

610 if settings.rfc_references:

611 self.implicit_dispatch.append((self.patterns.rfc,

612 self.rfc_reference))

613

614 def parse(self, text, lineno, memo, parent):

615 # Needs to be refactored for nested inline markup.

616 # Add nested_parse() method?

617 """

618 Return 2 lists: nodes (text and inline elements), and system_messages.

619

620 Using `self.patterns.initial`, a pattern which matches start-strings

621 (emphasis, strong, interpreted, phrase reference, literal,

622 substitution reference, and inline target) and complete constructs

623 (simple reference, footnote reference), search for a candidate. When

624 one is found, check for validity (e.g., not a quoted '*' character).

625 If valid, search for the corresponding end string if applicable, and

626 check it for validity. If not found or invalid, generate a warning

627 and ignore the start-string. Implicit inline markup (e.g. standalone

628 URIs) is found last.

629

630 :text: source string

631 :lineno: absolute line number (cf. statemachine.get_source_and_line())

632 """

633 self.reporter = memo.reporter

634 self.document = memo.document

635 self.language = memo.language

636 self.parent = parent

637 pattern_search = self.patterns.initial.search

638 dispatch = self.dispatch

639 remaining = escape2null(text)

640 processed = []

641 unprocessed = []

642 messages = []

643 while remaining:

644 match = pattern_search(remaining)

645 if match:

646 groups = match.groupdict()

647 method = dispatch[groups['start'] or groups['backquote']

648 or groups['refend'] or groups['fnend']]

649 before, inlines, remaining, sysmessages = method(self, match,

650 lineno)

651 unprocessed.append(before)

652 messages += sysmessages

653 if inlines:

654 processed += self.implicit_inline(''.join(unprocessed),

655 lineno)

656 processed += inlines

657 unprocessed = []

658 else:

659 break

660 remaining = ''.join(unprocessed) + remaining

661 if remaining:

662 processed += self.implicit_inline(remaining, lineno)

663 return processed, messages

664

665 # Inline object recognition

666 # -------------------------

667 # See also init_customizations().

668 non_whitespace_before = r'(?<!\s)'

669 non_whitespace_escape_before = r'(?<![\s\x00])'

670 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'

671 non_whitespace_after = r'(?!\s)'

672 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):

673 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'

674 # Valid URI characters (see RFC 2396 & RFC 2732);

675 # final \x00 allows backslash escapes in URIs:

676 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""

677 # Delimiter indicating the end of a URI (not part of the URI):

678 uri_end_delim = r"""[>]"""

679 # Last URI character; same as uric but no punctuation:

680 urilast = r"""[_~*/=+a-zA-Z0-9]"""

681 # End of a URI (either 'urilast' or 'uric followed by a

682 # uri_end_delim'):

683 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()

684 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""

685 email_pattern = r"""

686 %(emailc)s+(?:\.%(emailc)s+)* # name

687 (?<!\x00)@ # at

688 %(emailc)s+(?:\.%(emailc)s*)* # host

689 %(uri_end)s # final URI char

690 """

691

692 def quoted_start(self, match):

693 """Test if inline markup start-string is 'quoted'.

694

695 'Quoted' in this context means the start-string is enclosed in a pair

696 of matching opening/closing delimiters (not necessarily quotes)

697 or at the end of the match.

698 """

699 string = match.string

700 start = match.start()

701 if start == 0: # start-string at beginning of text

702 return False

703 prestart = string[start - 1]

704 try:

705 poststart = string[match.end()]

706 except IndexError: # start-string at end of text

707 return True # not "quoted" but no markup start-string either

708 return punctuation_chars.match_chars(prestart, poststart)

709

710 def inline_obj(self, match, lineno, end_pattern, nodeclass,

711 restore_backslashes=False):

712 string = match.string

713 matchstart = match.start('start')

714 matchend = match.end('start')

715 if self.quoted_start(match):

716 return string[:matchend], [], string[matchend:], [], ''

717 endmatch = end_pattern.search(string[matchend:])

718 if endmatch and endmatch.start(1): # 1 or more chars

719 text = endmatch.string[:endmatch.start(1)]

720 if restore_backslashes:

721 text = unescape(text, True)

722 textend = matchend + endmatch.end(1)

723 rawsource = unescape(string[matchstart:textend], True)

724 node = nodeclass(rawsource, text)

725 return (string[:matchstart], [node],

726 string[textend:], [], endmatch.group(1))

727 msg = self.reporter.warning(

728 'Inline %s start-string without end-string.'

729 % nodeclass.__name__, line=lineno)

730 text = unescape(string[matchstart:matchend], True)

731 prb = self.problematic(text, text, msg)

732 return string[:matchstart], [prb], string[matchend:], [msg], ''

733

734 def problematic(self, text, rawsource, message):

735 msgid = self.document.set_id(message, self.parent)

736 problematic = nodes.problematic(rawsource, text, refid=msgid)

737 prbid = self.document.set_id(problematic)

738 message.add_backref(prbid)

739 return problematic

740

741 def emphasis(self, match, lineno):

742 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

743 match, lineno, self.patterns.emphasis, nodes.emphasis)

744 return before, inlines, remaining, sysmessages

745

746 def strong(self, match, lineno):

747 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

748 match, lineno, self.patterns.strong, nodes.strong)

749 return before, inlines, remaining, sysmessages

750

751 def interpreted_or_phrase_ref(self, match, lineno):

752 end_pattern = self.patterns.interpreted_or_phrase_ref

753 string = match.string

754 matchstart = match.start('backquote')

755 matchend = match.end('backquote')

756 rolestart = match.start('role')

757 role = match.group('role')

758 position = ''

759 if role:

760 role = role[1:-1]

761 position = 'prefix'

762 elif self.quoted_start(match):

763 return string[:matchend], [], string[matchend:], []

764 endmatch = end_pattern.search(string[matchend:])

765 if endmatch and endmatch.start(1): # 1 or more chars

766 textend = matchend + endmatch.end()

767 if endmatch.group('role'):

768 if role:

769 msg = self.reporter.warning(

770 'Multiple roles in interpreted text (both '

771 'prefix and suffix present; only one allowed).',

772 line=lineno)

773 text = unescape(string[rolestart:textend], True)

774 prb = self.problematic(text, text, msg)

775 return string[:rolestart], [prb], string[textend:], [msg]

776 role = endmatch.group('suffix')[1:-1]

777 position = 'suffix'

778 escaped = endmatch.string[:endmatch.start(1)]

779 rawsource = unescape(string[matchstart:textend], True)

780 if rawsource[-1:] == '_':

781 if role:

782 msg = self.reporter.warning(

783 'Mismatch: both interpreted text role %s and '

784 'reference suffix.' % position, line=lineno)

785 text = unescape(string[rolestart:textend], True)

786 prb = self.problematic(text, text, msg)

787 return string[:rolestart], [prb], string[textend:], [msg]

788 return self.phrase_ref(string[:matchstart], string[textend:],

789 rawsource, escaped)

790 else:

791 rawsource = unescape(string[rolestart:textend], True)

792 nodelist, messages = self.interpreted(rawsource, escaped, role,

793 lineno)

794 return (string[:rolestart], nodelist,

795 string[textend:], messages)

796 msg = self.reporter.warning(

797 'Inline interpreted text or phrase reference start-string '

798 'without end-string.', line=lineno)

799 text = unescape(string[matchstart:matchend], True)

800 prb = self.problematic(text, text, msg)

801 return string[:matchstart], [prb], string[matchend:], [msg]

802

803 def phrase_ref(self, before, after, rawsource, escaped, text=None):

804 # `text` is ignored (since 0.16)

805 match = self.patterns.embedded_link.search(escaped)

806 if match: # embedded <URI> or <alias_>

807 text = escaped[:match.start(0)]

808 unescaped = unescape(text)

809 rawtext = unescape(text, True)

810 aliastext = match.group(2)

811 rawaliastext = unescape(aliastext, True)

812 underscore_escaped = rawaliastext.endswith(r'\_')

813 if (aliastext.endswith('_')

814 and not (underscore_escaped

815 or self.patterns.uri.match(aliastext))):

816 aliastype = 'name'

817 alias = normalize_name(unescape(aliastext[:-1]))

818 target = nodes.target(match.group(1), refname=alias)

819 target.indirect_reference_name = whitespace_normalize_name(

820 unescape(aliastext[:-1]))

821 else:

822 aliastype = 'uri'

823 # remove unescaped whitespace

824 alias_parts = split_escaped_whitespace(match.group(2))

825 alias = ' '.join(''.join(part.split())

826 for part in alias_parts)

827 alias = self.adjust_uri(unescape(alias))

828 if alias.endswith(r'\_'):

829 alias = alias[:-2] + '_'

830 target = nodes.target(match.group(1), refuri=alias)

831 target.referenced = 1

832 if not aliastext:

833 raise ApplicationError('problem with embedded link: %r'

834 % aliastext)

835 if not text:

836 text = alias

837 unescaped = unescape(text)

838 rawtext = rawaliastext

839 else:

840 text = escaped

841 unescaped = unescape(text)

842 target = None

843 rawtext = unescape(escaped, True)

844

845 refname = normalize_name(unescaped)

846 reference = nodes.reference(rawsource, text,

847 name=whitespace_normalize_name(unescaped))

848 reference[0].rawsource = rawtext

849

850 node_list = [reference]

851

852 if rawsource[-2:] == '__':

853 if target and (aliastype == 'name'):

854 reference['refname'] = alias

855 self.document.note_refname(reference)

856 # self.document.note_indirect_target(target) # required?

857 elif target and (aliastype == 'uri'):

858 reference['refuri'] = alias

859 else:

860 reference['anonymous'] = 1

861 else:

862 if target:

863 target['names'].append(refname)

864 if aliastype == 'name':

865 reference['refname'] = alias

866 self.document.note_indirect_target(target)

867 self.document.note_refname(reference)

868 else:

869 reference['refuri'] = alias

870 self.document.note_explicit_target(target, self.parent)

871 # target.note_referenced_by(name=refname)

872 node_list.append(target)

873 else:

874 reference['refname'] = refname

875 self.document.note_refname(reference)

876 return before, node_list, after, []

877

878 def adjust_uri(self, uri):

879 match = self.patterns.email.match(uri)

880 if match:

881 return 'mailto:' + uri

882 else:

883 return uri

884

885 def interpreted(self, rawsource, text, role, lineno):

886 role_fn, messages = roles.role(role, self.language, lineno,

887 self.reporter)

888 if role_fn:

889 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)

890 return nodes, messages + messages2

891 else:

892 msg = self.reporter.error(

893 'Unknown interpreted text role "%s".' % role,

894 line=lineno)

895 return ([self.problematic(rawsource, rawsource, msg)],

896 messages + [msg])

897

898 def literal(self, match, lineno):

899 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

900 match, lineno, self.patterns.literal, nodes.literal,

901 restore_backslashes=True)

902 return before, inlines, remaining, sysmessages

903

904 def inline_internal_target(self, match, lineno):

905 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

906 match, lineno, self.patterns.target, nodes.target)

907 if inlines and isinstance(inlines[0], nodes.target):

908 assert len(inlines) == 1

909 target = inlines[0]

910 name = normalize_name(target.astext())

911 target['names'].append(name)

912 self.document.note_explicit_target(target, self.parent)

913 return before, inlines, remaining, sysmessages

914

915 def substitution_reference(self, match, lineno):

916 before, inlines, remaining, sysmessages, endstring = self.inline_obj(

917 match, lineno, self.patterns.substitution_ref,

918 nodes.substitution_reference)

919 if len(inlines) == 1:

920 subref_node = inlines[0]

921 if isinstance(subref_node, nodes.substitution_reference):

922 subref_text = subref_node.astext()

923 self.document.note_substitution_ref(subref_node, subref_text)

924 if endstring[-1:] == '_':

925 reference_node = nodes.reference(

926 '|%s%s' % (subref_text, endstring), '')

927 if endstring[-2:] == '__':

928 reference_node['anonymous'] = 1

929 else:

930 reference_node['refname'] = normalize_name(subref_text)

931 self.document.note_refname(reference_node)

932 reference_node += subref_node

933 inlines = [reference_node]

934 return before, inlines, remaining, sysmessages

935

936 def footnote_reference(self, match, lineno):

937 """

938 Handles `nodes.footnote_reference` and `nodes.citation_reference`

939 elements.

940 """

941 label = match.group('footnotelabel')

942 refname = normalize_name(label)

943 string = match.string

944 before = string[:match.start('whole')]

945 remaining = string[match.end('whole'):]

946 if match.group('citationlabel'):

947 refnode = nodes.citation_reference('[%s]_' % label,

948 refname=refname)

949 refnode += nodes.Text(label)

950 self.document.note_citation_ref(refnode)

951 else:

952 refnode = nodes.footnote_reference('[%s]_' % label)

953 if refname[0] == '#':

954 refname = refname[1:]

955 refnode['auto'] = 1

956 self.document.note_autofootnote_ref(refnode)

957 elif refname == '*':

958 refname = ''

959 refnode['auto'] = '*'

960 self.document.note_symbol_footnote_ref(

961 refnode)

962 else:

963 refnode += nodes.Text(label)

964 if refname:

965 refnode['refname'] = refname

966 self.document.note_footnote_ref(refnode)

967 if utils.get_trim_footnote_ref_space(self.document.settings):

968 before = before.rstrip()

969 return before, [refnode], remaining, []

970

971 def reference(self, match, lineno, anonymous=False):

972 referencename = match.group('refname')

973 refname = normalize_name(referencename)

974 referencenode = nodes.reference(

975 referencename + match.group('refend'), referencename,

976 name=whitespace_normalize_name(referencename))

977 referencenode[0].rawsource = referencename

978 if anonymous:

979 referencenode['anonymous'] = 1

980 else:

981 referencenode['refname'] = refname

982 self.document.note_refname(referencenode)

983 string = match.string

984 matchstart = match.start('whole')

985 matchend = match.end('whole')

986 return string[:matchstart], [referencenode], string[matchend:], []

987

988 def anonymous_reference(self, match, lineno):

989 return self.reference(match, lineno, anonymous=True)

990

991 def standalone_uri(self, match, lineno):

992 if (not match.group('scheme')

993 or match.group('scheme').lower() in urischemes.schemes):

994 if match.group('email'):

995 addscheme = 'mailto:'

996 else:

997 addscheme = ''

998 text = match.group('whole')

999 refuri = addscheme + unescape(text)

1000 reference = nodes.reference(unescape(text, True), text,

1001 refuri=refuri)

1002 return [reference]

1003 else: # not a valid scheme

1004 raise MarkupMismatch

1005

1006 def pep_reference(self, match, lineno):

1007 text = match.group(0)

1008 if text.startswith('pep-'):

1009 pepnum = int(unescape(match.group('pepnum1')))

1010 elif text.startswith('PEP'):

1011 pepnum = int(unescape(match.group('pepnum2')))

1012 else:

1013 raise MarkupMismatch

1014 ref = (self.document.settings.pep_base_url

1015 + self.document.settings.pep_file_url_template % pepnum)

1016 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1017

1018 rfc_url = 'rfc%d.html'

1019

1020 def rfc_reference(self, match, lineno):

1021 text = match.group(0)

1022 if text.startswith('RFC'):

1023 rfcnum = int(unescape(match.group('rfcnum')))

1024 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum

1025 else:

1026 raise MarkupMismatch

1027 return [nodes.reference(unescape(text, True), text, refuri=ref)]

1028

1029 def implicit_inline(self, text, lineno):

1030 """

1031 Check each of the patterns in `self.implicit_dispatch` for a match,

1032 and dispatch to the stored method for the pattern. Recursively check

1033 the text before and after the match. Return a list of `nodes.Text`

1034 and inline element nodes.

1035 """

1036 if not text:

1037 return []

1038 for pattern, method in self.implicit_dispatch:

1039 match = pattern.search(text)

1040 if match:

1041 try:

1042 # Must recurse on strings before *and* after the match;

1043 # there may be multiple patterns.

1044 return (self.implicit_inline(text[:match.start()], lineno)

1045 + method(match, lineno)

1046 + self.implicit_inline(text[match.end():], lineno))

1047 except MarkupMismatch:

1048 pass

1049 return [nodes.Text(text)]

1050

1051 dispatch = {'*': emphasis,

1052 '**': strong,

1053 '`': interpreted_or_phrase_ref,

1054 '``': literal,

1055 '_`': inline_internal_target,

1056 ']_': footnote_reference,

1057 '|': substitution_reference,

1058 '_': reference,

1059 '__': anonymous_reference}

1060

1061

1062def _loweralpha_to_int(s, _zero=(ord('a')-1)):

1063 return ord(s) - _zero

1064

1065

1066def _upperalpha_to_int(s, _zero=(ord('A')-1)):

1067 return ord(s) - _zero

1068

1069

1070def _lowerroman_to_int(s):

1071 return roman.fromRoman(s.upper())

1072

1073

1074class Body(RSTState):

1075

1076 """

1077 Generic classifier of the first line of a block.

1078 """

1079

1080 double_width_pad_char = tableparser.TableParser.double_width_pad_char

1081 """Padding character for East Asian double-width text."""

1082

1083 enum = Struct()

1084 """Enumerated list parsing information."""

1085

1086 enum.formatinfo = {

1087 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),

1088 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),

1089 'period': Struct(prefix='', suffix='.', start=0, end=-1)}

1090 enum.formats = enum.formatinfo.keys()

1091 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',

1092 'lowerroman', 'upperroman'] # ORDERED!

1093 enum.sequencepats = {'arabic': '[0-9]+',

1094 'loweralpha': '[a-z]',

1095 'upperalpha': '[A-Z]',

1096 'lowerroman': '[ivxlcdm]+',

1097 'upperroman': '[IVXLCDM]+'}

1098 enum.converters = {'arabic': int,

1099 'loweralpha': _loweralpha_to_int,

1100 'upperalpha': _upperalpha_to_int,

1101 'lowerroman': _lowerroman_to_int,

1102 'upperroman': roman.fromRoman}

1103

1104 enum.sequenceregexps = {}

1105 for sequence in enum.sequences:

1106 enum.sequenceregexps[sequence] = re.compile(

1107 enum.sequencepats[sequence] + '$')

1108

1109 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')

1110 """Matches the top (& bottom) of a full table)."""

1111

1112 simple_table_top_pat = re.compile('=+( +=+)+ *$')

1113 """Matches the top of a simple table."""

1114

1115 simple_table_border_pat = re.compile('=+[ =]*$')

1116 """Matches the bottom & header bottom of a simple table."""

1117

1118 pats = {}

1119 """Fragments of patterns used by transitions."""

1120

1121 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'

1122 pats['alpha'] = '[a-zA-Z]'

1123 pats['alphanum'] = '[a-zA-Z0-9]'

1124 pats['alphanumplus'] = '[a-zA-Z0-9_-]'

1125 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'

1126 '|%(upperroman)s|#)' % enum.sequencepats)

1127 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats

1128 # @@@ Loosen up the pattern? Allow Unicode?

1129 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats

1130 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats

1131 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats

1132 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats

1133

1134 for format in enum.formats:

1135 pats[format] = '(?P<%s>%s%s%s)' % (

1136 format, re.escape(enum.formatinfo[format].prefix),

1137 pats['enum'], re.escape(enum.formatinfo[format].suffix))

1138

1139 patterns = {

1140 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',

1141 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,

1142 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',

1143 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,

1144 'doctest': r'>>>( +|$)',

1145 'line_block': r'\|( +|$)',

1146 'grid_table_top': grid_table_top_pat,

1147 'simple_table_top': simple_table_top_pat,

1148 'explicit_markup': r'\.\.( +|$)',

1149 'anonymous': r'__( +|$)',

1150 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,

1151 'text': r''}

1152 initial_transitions = (

1153 'bullet',

1154 'enumerator',

1155 'field_marker',

1156 'option_marker',

1157 'doctest',

1158 'line_block',

1159 'grid_table_top',

1160 'simple_table_top',

1161 'explicit_markup',

1162 'anonymous',

1163 'line',

1164 'text')

1165

1166 def indent(self, match, context, next_state):

1167 """Block quote."""

1168 (indented, indent, line_offset, blank_finish

1169 ) = self.state_machine.get_indented()

1170 elements = self.block_quote(indented, line_offset)

1171 self.parent += elements

1172 if not blank_finish:

1173 self.parent += self.unindent_warning('Block quote')

1174 return context, next_state, []

1175

1176 def block_quote(self, indented, line_offset):

1177 elements = []

1178 while indented:

1179 blockquote = nodes.block_quote(rawsource='\n'.join(indented))

1180 (blockquote.source, blockquote.line

1181 ) = self.state_machine.get_source_and_line(line_offset+1)

1182 (blockquote_lines,

1183 attribution_lines,

1184 attribution_offset,

1185 indented,

1186 new_line_offset) = self.split_attribution(indented, line_offset)

1187 self.nested_parse(blockquote_lines, line_offset, blockquote)

1188 elements.append(blockquote)

1189 if attribution_lines:

1190 attribution, messages = self.parse_attribution(

1191 attribution_lines, line_offset+attribution_offset)

1192 blockquote += attribution

1193 elements += messages

1194 line_offset = new_line_offset

1195 while indented and not indented[0]:

1196 indented = indented[1:]

1197 line_offset += 1

1198 return elements

1199

1200 # U+2014 is an em-dash:

1201 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')

1202

1203 def split_attribution(self, indented, line_offset):

1204 """

1205 Check for a block quote attribution and split it off:

1206

1207 * First line after a blank line must begin with a dash ("--", "---",

1208 em-dash; matches `self.attribution_pattern`).

1209 * Every line after that must have consistent indentation.

1210 * Attributions must be preceded by block quote content.

1211

1212 Return a tuple of: (block quote content lines, attribution lines,

1213 attribution offset, remaining indented lines, remaining lines offset).

1214 """

1215 blank = None

1216 nonblank_seen = False

1217 for i in range(len(indented)):

1218 line = indented[i].rstrip()

1219 if line:

1220 if nonblank_seen and blank == i - 1: # last line blank

1221 match = self.attribution_pattern.match(line)

1222 if match:

1223 attribution_end, indent = self.check_attribution(

1224 indented, i)

1225 if attribution_end:

1226 a_lines = indented[i:attribution_end]

1227 a_lines.trim_left(match.end(), end=1)

1228 a_lines.trim_left(indent, start=1)

1229 return (indented[:i], a_lines,

1230 i, indented[attribution_end:],

1231 line_offset + attribution_end)

1232 nonblank_seen = True

1233 else:

1234 blank = i

1235 else:

1236 return indented, None, None, None, None

1237

1238 def check_attribution(self, indented, attribution_start):

1239 """

1240 Check attribution shape.

1241 Return the index past the end of the attribution, and the indent.

1242 """

1243 indent = None

1244 i = attribution_start + 1

1245 for i in range(attribution_start + 1, len(indented)):

1246 line = indented[i].rstrip()

1247 if not line:

1248 break

1249 if indent is None:

1250 indent = len(line) - len(line.lstrip())

1251 elif len(line) - len(line.lstrip()) != indent:

1252 return None, None # bad shape; not an attribution

1253 else:

1254 # return index of line after last attribution line:

1255 i += 1

1256 return i, (indent or 0)

1257

1258 def parse_attribution(self, indented, line_offset):

1259 text = '\n'.join(indented).rstrip()

1260 lineno = 1 + line_offset # line_offset is zero-based

1261 textnodes, messages = self.inline_text(text, lineno)

1262 node = nodes.attribution(text, '', *textnodes)

1263 node.source, node.line = self.state_machine.get_source_and_line(lineno)

1264 return node, messages

1265

1266 def bullet(self, match, context, next_state):

1267 """Bullet list item."""

1268 bulletlist = nodes.bullet_list()

1269 (bulletlist.source,

1270 bulletlist.line) = self.state_machine.get_source_and_line()

1271 self.parent += bulletlist

1272 bulletlist['bullet'] = match.string[0]

1273 i, blank_finish = self.list_item(match.end())

1274 bulletlist += i

1275 offset = self.state_machine.line_offset + 1 # next line

1276 new_line_offset, blank_finish = self.nested_list_parse(

1277 self.state_machine.input_lines[offset:],

1278 input_offset=self.state_machine.abs_line_offset() + 1,

1279 node=bulletlist, initial_state='BulletList',

1280 blank_finish=blank_finish)

1281 self.goto_line(new_line_offset)

1282 if not blank_finish:

1283 self.parent += self.unindent_warning('Bullet list')

1284 return [], next_state, []

1285

1286 def list_item(self, indent):

1287 if self.state_machine.line[indent:]:

1288 indented, line_offset, blank_finish = (

1289 self.state_machine.get_known_indented(indent))

1290 else:

1291 indented, indent, line_offset, blank_finish = (

1292 self.state_machine.get_first_known_indented(indent))

1293 listitem = nodes.list_item('\n'.join(indented))

1294 if indented:

1295 self.nested_parse(indented, input_offset=line_offset,

1296 node=listitem)

1297 return listitem, blank_finish

1298

1299 def enumerator(self, match, context, next_state):

1300 """Enumerated List Item"""

1301 format, sequence, text, ordinal = self.parse_enumerator(match)

1302 if not self.is_enumerated_list_item(ordinal, sequence, format):

1303 raise statemachine.TransitionCorrection('text')

1304 enumlist = nodes.enumerated_list()

1305 self.parent += enumlist

1306 if sequence == '#':

1307 enumlist['enumtype'] = 'arabic'

1308 else:

1309 enumlist['enumtype'] = sequence

1310 enumlist['prefix'] = self.enum.formatinfo[format].prefix

1311 enumlist['suffix'] = self.enum.formatinfo[format].suffix

1312 if ordinal != 1:

1313 enumlist['start'] = ordinal

1314 msg = self.reporter.info(

1315 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'

1316 % (text, ordinal))

1317 self.parent += msg

1318 listitem, blank_finish = self.list_item(match.end())

1319 enumlist += listitem

1320 offset = self.state_machine.line_offset + 1 # next line

1321 newline_offset, blank_finish = self.nested_list_parse(

1322 self.state_machine.input_lines[offset:],

1323 input_offset=self.state_machine.abs_line_offset() + 1,

1324 node=enumlist, initial_state='EnumeratedList',

1325 blank_finish=blank_finish,

1326 extra_settings={'lastordinal': ordinal,

1327 'format': format,

1328 'auto': sequence == '#'})

1329 self.goto_line(newline_offset)

1330 if not blank_finish:

1331 self.parent += self.unindent_warning('Enumerated list')

1332 return [], next_state, []

1333

1334 def parse_enumerator(self, match, expected_sequence=None):

1335 """

1336 Analyze an enumerator and return the results.

1337

1338 :Return:

1339 - the enumerator format ('period', 'parens', or 'rparen'),

1340 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),

1341 - the text of the enumerator, stripped of formatting, and

1342 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;

1343 ``None`` is returned for invalid enumerator text).

1344

1345 The enumerator format has already been determined by the regular

1346 expression match. If `expected_sequence` is given, that sequence is

1347 tried first. If not, we check for Roman numeral 1. This way,

1348 single-character Roman numerals (which are also alphabetical) can be

1349 matched. If no sequence has been matched, all sequences are checked in

1350 order.

1351 """

1352 groupdict = match.groupdict()

1353 sequence = ''

1354 for format in self.enum.formats:

1355 if groupdict[format]: # was this the format matched?

1356 break # yes; keep `format`

1357 else: # shouldn't happen

1358 raise ParserError('enumerator format not matched')

1359 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501

1360 : self.enum.formatinfo[format].end]

1361 if text == '#':

1362 sequence = '#'

1363 elif expected_sequence:

1364 try:

1365 if self.enum.sequenceregexps[expected_sequence].match(text):

1366 sequence = expected_sequence

1367 except KeyError: # shouldn't happen

1368 raise ParserError('unknown enumerator sequence: %s'

1369 % sequence)

1370 elif text == 'i':

1371 sequence = 'lowerroman'

1372 elif text == 'I':

1373 sequence = 'upperroman'

1374 if not sequence:

1375 for sequence in self.enum.sequences:

1376 if self.enum.sequenceregexps[sequence].match(text):

1377 break

1378 else: # shouldn't happen

1379 raise ParserError('enumerator sequence not matched')

1380 if sequence == '#':

1381 ordinal = 1

1382 else:

1383 try:

1384 ordinal = self.enum.converters[sequence](text)

1385 except roman.InvalidRomanNumeralError:

1386 ordinal = None

1387 return format, sequence, text, ordinal

1388

1389 def is_enumerated_list_item(self, ordinal, sequence, format):

1390 """

1391 Check validity based on the ordinal value and the second line.

1392

1393 Return true if the ordinal is valid and the second line is blank,

1394 indented, or starts with the next enumerator or an auto-enumerator.

1395 """

1396 if ordinal is None:

1397 return None

1398 try:

1399 next_line = self.state_machine.next_line()

1400 except EOFError: # end of input lines

1401 self.state_machine.previous_line()

1402 return 1

1403 else:

1404 self.state_machine.previous_line()

1405 if not next_line[:1].strip(): # blank or indented

1406 return 1

1407 result = self.make_enumerator(ordinal + 1, sequence, format)

1408 if result:

1409 next_enumerator, auto_enumerator = result

1410 try:

1411 if (next_line.startswith(next_enumerator)

1412 or next_line.startswith(auto_enumerator)):

1413 return 1

1414 except TypeError:

1415 pass

1416 return None

1417

1418 def make_enumerator(self, ordinal, sequence, format):

1419 """

1420 Construct and return the next enumerated list item marker, and an

1421 auto-enumerator ("#" instead of the regular enumerator).

1422

1423 Return ``None`` for invalid (out of range) ordinals.

1424 """

1425 if sequence == '#':

1426 enumerator = '#'

1427 elif sequence == 'arabic':

1428 enumerator = str(ordinal)

1429 else:

1430 if sequence.endswith('alpha'):

1431 if ordinal > 26:

1432 return None

1433 enumerator = chr(ordinal + ord('a') - 1)

1434 elif sequence.endswith('roman'):

1435 try:

1436 enumerator = roman.toRoman(ordinal)

1437 except roman.RomanError:

1438 return None

1439 else: # shouldn't happen

1440 raise ParserError('unknown enumerator sequence: "%s"'

1441 % sequence)

1442 if sequence.startswith('lower'):

1443 enumerator = enumerator.lower()

1444 elif sequence.startswith('upper'):

1445 enumerator = enumerator.upper()

1446 else: # shouldn't happen

1447 raise ParserError('unknown enumerator sequence: "%s"'

1448 % sequence)

1449 formatinfo = self.enum.formatinfo[format]

1450 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix

1451 + ' ')

1452 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '

1453 return next_enumerator, auto_enumerator

1454

1455 def field_marker(self, match, context, next_state):

1456 """Field list item."""

1457 field_list = nodes.field_list()

1458 self.parent += field_list

1459 field, blank_finish = self.field(match)

1460 field_list += field

1461 offset = self.state_machine.line_offset + 1 # next line

1462 newline_offset, blank_finish = self.nested_list_parse(

1463 self.state_machine.input_lines[offset:],

1464 input_offset=self.state_machine.abs_line_offset() + 1,

1465 node=field_list, initial_state='FieldList',

1466 blank_finish=blank_finish)

1467 self.goto_line(newline_offset)

1468 if not blank_finish:

1469 self.parent += self.unindent_warning('Field list')

1470 return [], next_state, []

1471

1472 def field(self, match):

1473 name = self.parse_field_marker(match)

1474 src, srcline = self.state_machine.get_source_and_line()

1475 lineno = self.state_machine.abs_line_number()

1476 (indented, indent, line_offset, blank_finish

1477 ) = self.state_machine.get_first_known_indented(match.end())

1478 field_node = nodes.field()

1479 field_node.source = src

1480 field_node.line = srcline

1481 name_nodes, name_messages = self.inline_text(name, lineno)

1482 field_node += nodes.field_name(name, '', *name_nodes)

1483 field_body = nodes.field_body('\n'.join(indented), *name_messages)

1484 field_node += field_body

1485 if indented:

1486 self.parse_field_body(indented, line_offset, field_body)

1487 return field_node, blank_finish

1488

1489 def parse_field_marker(self, match):

1490 """Extract & return field name from a field marker match."""

1491 field = match.group()[1:] # strip off leading ':'

1492 field = field[:field.rfind(':')] # strip off trailing ':' etc.

1493 return field

1494

1495 def parse_field_body(self, indented, offset, node):

1496 self.nested_parse(indented, input_offset=offset, node=node)

1497

1498 def option_marker(self, match, context, next_state):

1499 """Option list item."""

1500 optionlist = nodes.option_list()

1501 (optionlist.source, optionlist.line

1502 ) = self.state_machine.get_source_and_line()

1503 try:

1504 listitem, blank_finish = self.option_list_item(match)

1505 except MarkupError as error:

1506 # This shouldn't happen; pattern won't match.

1507 msg = self.reporter.error('Invalid option list marker: %s'

1508 % error)

1509 self.parent += msg

1510 (indented, indent, line_offset, blank_finish

1511 ) = self.state_machine.get_first_known_indented(match.end())

1512 elements = self.block_quote(indented, line_offset)

1513 self.parent += elements

1514 if not blank_finish:

1515 self.parent += self.unindent_warning('Option list')

1516 return [], next_state, []

1517 self.parent += optionlist

1518 optionlist += listitem

1519 offset = self.state_machine.line_offset + 1 # next line

1520 newline_offset, blank_finish = self.nested_list_parse(

1521 self.state_machine.input_lines[offset:],

1522 input_offset=self.state_machine.abs_line_offset() + 1,

1523 node=optionlist, initial_state='OptionList',

1524 blank_finish=blank_finish)

1525 self.goto_line(newline_offset)

1526 if not blank_finish:

1527 self.parent += self.unindent_warning('Option list')

1528 return [], next_state, []

1529

1530 def option_list_item(self, match):

1531 offset = self.state_machine.abs_line_offset()

1532 options = self.parse_option_marker(match)

1533 (indented, indent, line_offset, blank_finish

1534 ) = self.state_machine.get_first_known_indented(match.end())

1535 if not indented: # not an option list item

1536 self.goto_line(offset)

1537 raise statemachine.TransitionCorrection('text')

1538 option_group = nodes.option_group('', *options)

1539 description = nodes.description('\n'.join(indented))

1540 option_list_item = nodes.option_list_item('', option_group,

1541 description)

1542 if indented:

1543 self.nested_parse(indented, input_offset=line_offset,

1544 node=description)

1545 return option_list_item, blank_finish

1546

1547 def parse_option_marker(self, match):

1548 """

1549 Return a list of `node.option` and `node.option_argument` objects,

1550 parsed from an option marker match.

1551

1552 :Exception: `MarkupError` for invalid option markers.

1553 """

1554 optlist = []

1555 optionstrings = match.group().rstrip().split(', ')

1556 for optionstring in optionstrings:

1557 tokens = optionstring.split()

1558 delimiter = ' '

1559 firstopt = tokens[0].split('=', 1)

1560 if len(firstopt) > 1:

1561 # "--opt=value" form

1562 tokens[:1] = firstopt

1563 delimiter = '='

1564 elif (len(tokens[0]) > 2

1565 and ((tokens[0].startswith('-')

1566 and not tokens[0].startswith('--'))

1567 or tokens[0].startswith('+'))):

1568 # "-ovalue" form

1569 tokens[:1] = [tokens[0][:2], tokens[0][2:]]

1570 delimiter = ''

1571 if len(tokens) > 1 and (tokens[1].startswith('<')

1572 and tokens[-1].endswith('>')):

1573 # "-o <value1 value2>" form; join all values into one token

1574 tokens[1:] = [' '.join(tokens[1:])]

1575 if 0 < len(tokens) <= 2:

1576 option = nodes.option(optionstring)

1577 option += nodes.option_string(tokens[0], tokens[0])

1578 if len(tokens) > 1:

1579 option += nodes.option_argument(tokens[1], tokens[1],

1580 delimiter=delimiter)

1581 optlist.append(option)

1582 else:

1583 raise MarkupError(

1584 'wrong number of option tokens (=%s), should be 1 or 2: '

1585 '"%s"' % (len(tokens), optionstring))

1586 return optlist

1587

1588 def doctest(self, match, context, next_state):

1589 data = '\n'.join(self.state_machine.get_text_block())

1590 # TODO: prepend class value ['pycon'] (Python Console)

1591 # parse with `directives.body.CodeBlock` (returns literal-block

1592 # with class "code" and syntax highlight markup).

1593 self.parent += nodes.doctest_block(data, data)

1594 return [], next_state, []

1595

1596 def line_block(self, match, context, next_state):

1597 """First line of a line block."""

1598 block = nodes.line_block()

1599 self.parent += block

1600 lineno = self.state_machine.abs_line_number()

1601 line, messages, blank_finish = self.line_block_line(match, lineno)

1602 block += line

1603 self.parent += messages

1604 if not blank_finish:

1605 offset = self.state_machine.line_offset + 1 # next line

1606 new_line_offset, blank_finish = self.nested_list_parse(

1607 self.state_machine.input_lines[offset:],

1608 input_offset=self.state_machine.abs_line_offset() + 1,

1609 node=block, initial_state='LineBlock',

1610 blank_finish=0)

1611 self.goto_line(new_line_offset)

1612 if not blank_finish:

1613 self.parent += self.reporter.warning(

1614 'Line block ends without a blank line.',

1615 line=lineno+1)

1616 if len(block):

1617 if block[0].indent is None:

1618 block[0].indent = 0

1619 self.nest_line_block_lines(block)

1620 return [], next_state, []

1621

1622 def line_block_line(self, match, lineno):

1623 """Return one line element of a line_block."""

1624 (indented, indent, line_offset, blank_finish

1625 ) = self.state_machine.get_first_known_indented(match.end(),

1626 until_blank=True)

1627 text = '\n'.join(indented)

1628 text_nodes, messages = self.inline_text(text, lineno)

1629 line = nodes.line(text, '', *text_nodes)

1630 if match.string.rstrip() != '|': # not empty

1631 line.indent = len(match.group(1)) - 1

1632 return line, messages, blank_finish

1633

1634 def nest_line_block_lines(self, block):

1635 for index in range(1, len(block)):

1636 if getattr(block[index], 'indent', None) is None:

1637 block[index].indent = block[index - 1].indent

1638 self.nest_line_block_segment(block)

1639

1640 def nest_line_block_segment(self, block):

1641 indents = [item.indent for item in block]

1642 least = min(indents)

1643 new_items = []

1644 new_block = nodes.line_block()

1645 for item in block:

1646 if item.indent > least:

1647 new_block.append(item)

1648 else:

1649 if len(new_block):

1650 self.nest_line_block_segment(new_block)

1651 new_items.append(new_block)

1652 new_block = nodes.line_block()

1653 new_items.append(item)

1654 if len(new_block):

1655 self.nest_line_block_segment(new_block)

1656 new_items.append(new_block)

1657 block[:] = new_items

1658

1659 def grid_table_top(self, match, context, next_state):

1660 """Top border of a full table."""

1661 return self.table_top(match, context, next_state,

1662 self.isolate_grid_table,

1663 tableparser.GridTableParser)

1664

1665 def simple_table_top(self, match, context, next_state):

1666 """Top border of a simple table."""

1667 return self.table_top(match, context, next_state,

1668 self.isolate_simple_table,

1669 tableparser.SimpleTableParser)

1670

1671 def table_top(self, match, context, next_state,

1672 isolate_function, parser_class):

1673 """Top border of a generic table."""

1674 nodelist, blank_finish = self.table(isolate_function, parser_class)

1675 self.parent += nodelist

1676 if not blank_finish:

1677 msg = self.reporter.warning(

1678 'Blank line required after table.',

1679 line=self.state_machine.abs_line_number()+1)

1680 self.parent += msg

1681 return [], next_state, []

1682

1683 def table(self, isolate_function, parser_class):

1684 """Parse a table."""

1685 block, messages, blank_finish = isolate_function()

1686 if block:

1687 try:

1688 parser = parser_class()

1689 tabledata = parser.parse(block)

1690 tableline = (self.state_machine.abs_line_number() - len(block)

1691 + 1)

1692 table = self.build_table(tabledata, tableline)

1693 nodelist = [table] + messages

1694 except tableparser.TableMarkupError as err:

1695 nodelist = self.malformed_table(block, ' '.join(err.args),

1696 offset=err.offset) + messages

1697 else:

1698 nodelist = messages

1699 return nodelist, blank_finish

1700

1701 def isolate_grid_table(self):

1702 messages = []

1703 blank_finish = 1

1704 try:

1705 block = self.state_machine.get_text_block(flush_left=True)

1706 except statemachine.UnexpectedIndentationError as err:

1707 block, src, srcline = err.args

1708 messages.append(self.reporter.error('Unexpected indentation.',

1709 source=src, line=srcline))

1710 blank_finish = 0

1711 block.disconnect()

1712 # for East Asian chars:

1713 block.pad_double_width(self.double_width_pad_char)

1714 width = len(block[0].strip())

1715 for i in range(len(block)):

1716 block[i] = block[i].strip()

1717 if block[i][0] not in '+|': # check left edge

1718 blank_finish = 0

1719 self.state_machine.previous_line(len(block) - i)

1720 del block[i:]

1721 break

1722 if not self.grid_table_top_pat.match(block[-1]): # find bottom

1723 blank_finish = 0

1724 # from second-last to third line of table:

1725 for i in range(len(block) - 2, 1, -1):

1726 if self.grid_table_top_pat.match(block[i]):

1727 self.state_machine.previous_line(len(block) - i + 1)

1728 del block[i+1:]

1729 break

1730 else:

1731 messages.extend(self.malformed_table(block))

1732 return [], messages, blank_finish

1733 for i in range(len(block)): # check right edge

1734 if len(block[i]) != width or block[i][-1] not in '+|':

1735 messages.extend(self.malformed_table(block))

1736 return [], messages, blank_finish

1737 return block, messages, blank_finish

1738

1739 def isolate_simple_table(self):

1740 start = self.state_machine.line_offset

1741 lines = self.state_machine.input_lines

1742 limit = len(lines) - 1

1743 toplen = len(lines[start].strip())

1744 pattern_match = self.simple_table_border_pat.match

1745 found = 0

1746 found_at = None

1747 i = start + 1

1748 while i <= limit:

1749 line = lines[i]

1750 match = pattern_match(line)

1751 if match:

1752 if len(line.strip()) != toplen:

1753 self.state_machine.next_line(i - start)

1754 messages = self.malformed_table(

1755 lines[start:i+1], 'Bottom/header table border does '

1756 'not match top border.')

1757 return [], messages, i == limit or not lines[i+1].strip()

1758 found += 1

1759 found_at = i

1760 if found == 2 or i == limit or not lines[i+1].strip():

1761 end = i

1762 break

1763 i += 1

1764 else: # reached end of input_lines

1765 if found:

1766 extra = ' or no blank line after table bottom'

1767 self.state_machine.next_line(found_at - start)

1768 block = lines[start:found_at+1]

1769 else:

1770 extra = ''

1771 self.state_machine.next_line(i - start - 1)

1772 block = lines[start:]

1773 messages = self.malformed_table(

1774 block, 'No bottom table border found%s.' % extra)

1775 return [], messages, not extra

1776 self.state_machine.next_line(end - start)

1777 block = lines[start:end+1]

1778 # for East Asian chars:

1779 block.pad_double_width(self.double_width_pad_char)

1780 return block, [], end == limit or not lines[end+1].strip()

1781

1782 def malformed_table(self, block, detail='', offset=0):

1783 block.replace(self.double_width_pad_char, '')

1784 data = '\n'.join(block)

1785 message = 'Malformed table.'

1786 startline = self.state_machine.abs_line_number() - len(block) + 1

1787 if detail:

1788 message += '\n' + detail

1789 error = self.reporter.error(message, nodes.literal_block(data, data),

1790 line=startline+offset)

1791 return [error]

1792

1793 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):

1794 colwidths, headrows, bodyrows = tabledata

1795 table = nodes.table()

1796 if widths == 'auto':

1797 table['classes'] += ['colwidths-auto']

1798 elif widths: # "grid" or list of integers

1799 table['classes'] += ['colwidths-given']

1800 tgroup = nodes.tgroup(cols=len(colwidths))

1801 table += tgroup

1802 for colwidth in colwidths:

1803 colspec = nodes.colspec(colwidth=colwidth)

1804 if stub_columns:

1805 colspec.attributes['stub'] = 1

1806 stub_columns -= 1

1807 tgroup += colspec

1808 if headrows:

1809 thead = nodes.thead()

1810 tgroup += thead

1811 for row in headrows:

1812 thead += self.build_table_row(row, tableline)

1813 tbody = nodes.tbody()

1814 tgroup += tbody

1815 for row in bodyrows:

1816 tbody += self.build_table_row(row, tableline)

1817 return table

1818

1819 def build_table_row(self, rowdata, tableline):

1820 row = nodes.row()

1821 for cell in rowdata:

1822 if cell is None:

1823 continue

1824 morerows, morecols, offset, cellblock = cell

1825 attributes = {}

1826 if morerows:

1827 attributes['morerows'] = morerows

1828 if morecols:

1829 attributes['morecols'] = morecols

1830 entry = nodes.entry(**attributes)

1831 row += entry

1832 if ''.join(cellblock):

1833 self.nested_parse(cellblock, input_offset=tableline+offset,

1834 node=entry)

1835 return row

1836

1837 explicit = Struct()

1838 """Patterns and constants used for explicit markup recognition."""

1839

1840 explicit.patterns = Struct(

1841 target=re.compile(r"""

1842 (

1843 _ # anonymous target

1844 | # *OR*

1845 (?!_) # no underscore at the beginning

1846 (?P<quote>`?) # optional open quote

1847 (?![ `]) # first char. not space or

1848 # backquote

1849 (?P<name> # reference name

1850 .+?

1851 )

1852 %(non_whitespace_escape_before)s

1853 (?P=quote) # close quote if open quote used

1854 )

1855 (?<!(?<!\x00):) # no unescaped colon at end

1856 %(non_whitespace_escape_before)s

1857 [ ]? # optional space

1858 : # end of reference name

1859 ([ ]+|$) # followed by whitespace

1860 """ % vars(Inliner), re.VERBOSE),

1861 reference=re.compile(r"""

1862 (

1863 (?P<simple>%(simplename)s)_

1864 | # *OR*

1865 ` # open backquote

1866 (?![ ]) # not space

1867 (?P<phrase>.+?) # hyperlink phrase

1868 %(non_whitespace_escape_before)s

1869 `_ # close backquote,

1870 # reference mark

1871 )

1872 $ # end of string

1873 """ % vars(Inliner), re.VERBOSE),

1874 substitution=re.compile(r"""

1875 (

1876 (?![ ]) # first char. not space

1877 (?P<name>.+?) # substitution text

1878 %(non_whitespace_escape_before)s

1879 \| # close delimiter

1880 )

1881 ([ ]+|$) # followed by whitespace

1882 """ % vars(Inliner),

1883 re.VERBOSE),)

1884

1885 def footnote(self, match):

1886 src, srcline = self.state_machine.get_source_and_line()

1887 (indented, indent, offset, blank_finish

1888 ) = self.state_machine.get_first_known_indented(match.end())

1889 label = match.group(1)

1890 name = normalize_name(label)

1891 footnote = nodes.footnote('\n'.join(indented))

1892 footnote.source = src

1893 footnote.line = srcline

1894 if name[0] == '#': # auto-numbered

1895 name = name[1:] # autonumber label

1896 footnote['auto'] = 1

1897 if name:

1898 footnote['names'].append(name)

1899 self.document.note_autofootnote(footnote)

1900 elif name == '*': # auto-symbol

1901 name = ''

1902 footnote['auto'] = '*'

1903 self.document.note_symbol_footnote(footnote)

1904 else: # manually numbered

1905 footnote += nodes.label('', label)

1906 footnote['names'].append(name)

1907 self.document.note_footnote(footnote)

1908 if name:

1909 self.document.note_explicit_target(footnote, footnote)

1910 else:

1911 self.document.set_id(footnote, footnote)

1912 if indented:

1913 self.nested_parse(indented, input_offset=offset, node=footnote)

1914 return [footnote], blank_finish

1915

1916 def citation(self, match):

1917 src, srcline = self.state_machine.get_source_and_line()

1918 (indented, indent, offset, blank_finish

1919 ) = self.state_machine.get_first_known_indented(match.end())

1920 label = match.group(1)

1921 name = normalize_name(label)

1922 citation = nodes.citation('\n'.join(indented))

1923 citation.source = src

1924 citation.line = srcline

1925 citation += nodes.label('', label)

1926 citation['names'].append(name)

1927 self.document.note_citation(citation)

1928 self.document.note_explicit_target(citation, citation)

1929 if indented:

1930 self.nested_parse(indented, input_offset=offset, node=citation)

1931 return [citation], blank_finish

1932

1933 def hyperlink_target(self, match):

1934 pattern = self.explicit.patterns.target

1935 lineno = self.state_machine.abs_line_number()

1936 (block, indent, offset, blank_finish

1937 ) = self.state_machine.get_first_known_indented(

1938 match.end(), until_blank=True, strip_indent=False)

1939 blocktext = match.string[:match.end()] + '\n'.join(block)

1940 block = [escape2null(line) for line in block]

1941 escaped = block[0]

1942 blockindex = 0

1943 while True:

1944 targetmatch = pattern.match(escaped)

1945 if targetmatch:

1946 break

1947 blockindex += 1

1948 try:

1949 escaped += block[blockindex]

1950 except IndexError:

1951 raise MarkupError('malformed hyperlink target.')

1952 del block[:blockindex]

1953 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()

1954 target = self.make_target(block, blocktext, lineno,

1955 targetmatch.group('name'))

1956 return [target], blank_finish

1957

1958 def make_target(self, block, block_text, lineno, target_name):

1959 target_type, data = self.parse_target(block, block_text, lineno)

1960 if target_type == 'refname':

1961 target = nodes.target(block_text, '', refname=normalize_name(data))

1962 target.indirect_reference_name = data

1963 self.add_target(target_name, '', target, lineno)

1964 self.document.note_indirect_target(target)

1965 return target

1966 elif target_type == 'refuri':

1967 target = nodes.target(block_text, '')

1968 self.add_target(target_name, data, target, lineno)

1969 return target

1970 else:

1971 return data

1972

1973 def parse_target(self, block, block_text, lineno):

1974 """

1975 Determine the type of reference of a target.

1976

1977 :Return: A 2-tuple, one of:

1978

1979 - 'refname' and the indirect reference name

1980 - 'refuri' and the URI

1981 - 'malformed' and a system_message node

1982 """

1983 if block and block[-1].strip()[-1:] == '_': # possible indirect target

1984 reference = ' '.join(line.strip() for line in block)

1985 refname = self.is_reference(reference)

1986 if refname:

1987 return 'refname', refname

1988 ref_parts = split_escaped_whitespace(' '.join(block))

1989 reference = ' '.join(''.join(unescape(part).split())

1990 for part in ref_parts)

1991 return 'refuri', reference

1992

1993 def is_reference(self, reference):

1994 match = self.explicit.patterns.reference.match(

1995 whitespace_normalize_name(reference))

1996 if not match:

1997 return None

1998 return unescape(match.group('simple') or match.group('phrase'))

1999

2000 def add_target(self, targetname, refuri, target, lineno):

2001 target.line = lineno

2002 if targetname:

2003 name = normalize_name(unescape(targetname))

2004 target['names'].append(name)

2005 if refuri:

2006 uri = self.inliner.adjust_uri(refuri)

2007 if uri:

2008 target['refuri'] = uri

2009 else:

2010 raise ApplicationError('problem with URI: %r' % refuri)

2011 self.document.note_explicit_target(target, self.parent)

2012 else: # anonymous target

2013 if refuri:

2014 target['refuri'] = refuri

2015 target['anonymous'] = 1

2016 self.document.note_anonymous_target(target)

2017

2018 def substitution_def(self, match):

2019 pattern = self.explicit.patterns.substitution

2020 src, srcline = self.state_machine.get_source_and_line()

2021 (block, indent, offset, blank_finish

2022 ) = self.state_machine.get_first_known_indented(match.end(),

2023 strip_indent=False)

2024 blocktext = (match.string[:match.end()] + '\n'.join(block))

2025 block.disconnect()

2026 escaped = escape2null(block[0].rstrip())

2027 blockindex = 0

2028 while True:

2029 subdefmatch = pattern.match(escaped)

2030 if subdefmatch:

2031 break

2032 blockindex += 1

2033 try:

2034 escaped = escaped + ' ' + escape2null(

2035 block[blockindex].strip())

2036 except IndexError:

2037 raise MarkupError('malformed substitution definition.')

2038 del block[:blockindex] # strip out the substitution marker

2039 start = subdefmatch.end()-len(escaped)-1

2040 block[0] = (block[0].strip() + ' ')[start:-1]

2041 if not block[0]:

2042 del block[0]

2043 offset += 1

2044 while block and not block[-1].strip():

2045 block.pop()

2046 subname = subdefmatch.group('name')

2047 substitution_node = nodes.substitution_definition(blocktext)

2048 substitution_node.source = src

2049 substitution_node.line = srcline

2050 if not block:

2051 msg = self.reporter.warning(

2052 'Substitution definition "%s" missing contents.' % subname,

2053 nodes.literal_block(blocktext, blocktext),

2054 source=src, line=srcline)

2055 return [msg], blank_finish

2056 block[0] = block[0].strip()

2057 substitution_node['names'].append(

2058 nodes.whitespace_normalize_name(subname))

2059 new_abs_offset, blank_finish = self.nested_list_parse(

2060 block, input_offset=offset, node=substitution_node,

2061 initial_state='SubstitutionDef', blank_finish=blank_finish)

2062 i = 0

2063 for node in substitution_node[:]:

2064 if not (isinstance(node, nodes.Inline)

2065 or isinstance(node, nodes.Text)):

2066 self.parent += substitution_node[i]

2067 del substitution_node[i]

2068 else:

2069 i += 1

2070 for node in substitution_node.findall(nodes.Element):

2071 if self.disallowed_inside_substitution_definitions(node):

2072 pformat = nodes.literal_block('', node.pformat().rstrip())

2073 msg = self.reporter.error(

2074 'Substitution definition contains illegal element <%s>:'

2075 % node.tagname,

2076 pformat, nodes.literal_block(blocktext, blocktext),

2077 source=src, line=srcline)

2078 return [msg], blank_finish

2079 if len(substitution_node) == 0:

2080 msg = self.reporter.warning(

2081 'Substitution definition "%s" empty or invalid.' % subname,

2082 nodes.literal_block(blocktext, blocktext),

2083 source=src, line=srcline)

2084 return [msg], blank_finish

2085 self.document.note_substitution_def(

2086 substitution_node, subname, self.parent)

2087 return [substitution_node], blank_finish

2088

2089 def disallowed_inside_substitution_definitions(self, node):

2090 if (node['ids']

2091 or isinstance(node, nodes.reference) and node.get('anonymous')

2092 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501

2093 return True

2094 else:

2095 return False

2096

2097 def directive(self, match, **option_presets):

2098 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""

2099 type_name = match.group(1)

2100 directive_class, messages = directives.directive(

2101 type_name, self.memo.language, self.document)

2102 self.parent += messages

2103 if directive_class:

2104 return self.run_directive(

2105 directive_class, match, type_name, option_presets)

2106 else:

2107 return self.unknown_directive(type_name)

2108

2109 def run_directive(self, directive, match, type_name, option_presets):

2110 """

2111 Parse a directive then run its directive function.

2112

2113 Parameters:

2114

2115 - `directive`: The class implementing the directive. Must be

2116 a subclass of `rst.Directive`.

2117

2118 - `match`: A regular expression match object which matched the first

2119 line of the directive.

2120

2121 - `type_name`: The directive name, as used in the source text.

2122

2123 - `option_presets`: A dictionary of preset options, defaults for the

2124 directive options. Currently, only an "alt" option is passed by

2125 substitution definitions (value: the substitution name), which may

2126 be used by an embedded image directive.

2127

2128 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.

2129 """

2130 if isinstance(directive, (FunctionType, MethodType)):

2131 from docutils.parsers.rst import convert_directive_function

2132 directive = convert_directive_function(directive)

2133 lineno = self.state_machine.abs_line_number()

2134 initial_line_offset = self.state_machine.line_offset

2135 (indented, indent, line_offset, blank_finish

2136 ) = self.state_machine.get_first_known_indented(match.end(),

2137 strip_top=0)

2138 block_text = '\n'.join(self.state_machine.input_lines[

2139 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501

2140 try:

2141 arguments, options, content, content_offset = (

2142 self.parse_directive_block(indented, line_offset,

2143 directive, option_presets))

2144 except MarkupError as detail:

2145 error = self.reporter.error(

2146 'Error in "%s" directive:\n%s.' % (type_name,

2147 ' '.join(detail.args)),

2148 nodes.literal_block(block_text, block_text), line=lineno)

2149 return [error], blank_finish

2150 directive_instance = directive(

2151 type_name, arguments, options, content, lineno,

2152 content_offset, block_text, self, self.state_machine)

2153 try:

2154 result = directive_instance.run()

2155 except docutils.parsers.rst.DirectiveError as error:

2156 msg_node = self.reporter.system_message(error.level, error.msg,

2157 line=lineno)

2158 msg_node += nodes.literal_block(block_text, block_text)

2159 result = [msg_node]

2160 assert isinstance(result, list), \

2161 'Directive "%s" must return a list of nodes.' % type_name

2162 for i in range(len(result)):

2163 assert isinstance(result[i], nodes.Node), \

2164 ('Directive "%s" returned non-Node object (index %s): %r'

2165 % (type_name, i, result[i]))

2166 return (result,

2167 blank_finish or self.state_machine.is_next_line_blank())

2168

2169 def parse_directive_block(self, indented, line_offset, directive,

2170 option_presets):

2171 option_spec = directive.option_spec

2172 has_content = directive.has_content

2173 if indented and not indented[0].strip():

2174 indented.trim_start()

2175 line_offset += 1

2176 while indented and not indented[-1].strip():

2177 indented.trim_end()

2178 if indented and (directive.required_arguments

2179 or directive.optional_arguments

2180 or option_spec):

2181 for i, line in enumerate(indented):

2182 if not line.strip():

2183 break

2184 else:

2185 i += 1

2186 arg_block = indented[:i]

2187 content = indented[i+1:]

2188 content_offset = line_offset + i + 1

2189 else:

2190 content = indented

2191 content_offset = line_offset

2192 arg_block = []

2193 if option_spec:

2194 options, arg_block = self.parse_directive_options(

2195 option_presets, option_spec, arg_block)

2196 else:

2197 options = {}

2198 if arg_block and not (directive.required_arguments

2199 or directive.optional_arguments):

2200 content = arg_block + indented[i:]

2201 content_offset = line_offset

2202 arg_block = []

2203 while content and not content[0].strip():

2204 content.trim_start()

2205 content_offset += 1

2206 if directive.required_arguments or directive.optional_arguments:

2207 arguments = self.parse_directive_arguments(

2208 directive, arg_block)

2209 else:

2210 arguments = []

2211 if content and not has_content:

2212 raise MarkupError('no content permitted')

2213 return arguments, options, content, content_offset

2214

2215 def parse_directive_options(self, option_presets, option_spec, arg_block):

2216 options = option_presets.copy()

2217 for i, line in enumerate(arg_block):

2218 if re.match(Body.patterns['field_marker'], line):

2219 opt_block = arg_block[i:]

2220 arg_block = arg_block[:i]

2221 break

2222 else:

2223 opt_block = []

2224 if opt_block:

2225 success, data = self.parse_extension_options(option_spec,

2226 opt_block)

2227 if success: # data is a dict of options

2228 options.update(data)

2229 else: # data is an error string

2230 raise MarkupError(data)

2231 return options, arg_block

2232

2233 def parse_directive_arguments(self, directive, arg_block):

2234 required = directive.required_arguments

2235 optional = directive.optional_arguments

2236 arg_text = '\n'.join(arg_block)

2237 arguments = arg_text.split()

2238 if len(arguments) < required:

2239 raise MarkupError('%s argument(s) required, %s supplied'

2240 % (required, len(arguments)))

2241 elif len(arguments) > required + optional:

2242 if directive.final_argument_whitespace:

2243 arguments = arg_text.split(None, required + optional - 1)

2244 else:

2245 raise MarkupError(

2246 'maximum %s argument(s) allowed, %s supplied'

2247 % (required + optional, len(arguments)))

2248 return arguments

2249

2250 def parse_extension_options(self, option_spec, datalines):

2251 """

2252 Parse `datalines` for a field list containing extension options

2253 matching `option_spec`.

2254

2255 :Parameters:

2256 - `option_spec`: a mapping of option name to conversion

2257 function, which should raise an exception on bad input.

2258 - `datalines`: a list of input strings.

2259

2260 :Return:

2261 - Success value, 1 or 0.

2262 - An option dictionary on success, an error string on failure.

2263 """

2264 node = nodes.field_list()

2265 newline_offset, blank_finish = self.nested_list_parse(

2266 datalines, 0, node, initial_state='ExtensionOptions',

2267 blank_finish=True)

2268 if newline_offset != len(datalines): # incomplete parse of block

2269 return 0, 'invalid option block'

2270 try:

2271 options = utils.extract_extension_options(node, option_spec)

2272 except KeyError as detail:

2273 return 0, 'unknown option: "%s"' % detail.args[0]

2274 except (ValueError, TypeError) as detail:

2275 return 0, 'invalid option value: %s' % ' '.join(detail.args)

2276 except utils.ExtensionOptionError as detail:

2277 return 0, 'invalid option data: %s' % ' '.join(detail.args)

2278 if blank_finish:

2279 return 1, options

2280 else:

2281 return 0, 'option data incompletely parsed'

2282

2283 def unknown_directive(self, type_name):

2284 lineno = self.state_machine.abs_line_number()

2285 (indented, indent, offset, blank_finish

2286 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)

2287 text = '\n'.join(indented)

2288 error = self.reporter.error('Unknown directive type "%s".' % type_name,

2289 nodes.literal_block(text, text),

2290 line=lineno)

2291 return [error], blank_finish

2292

2293 def comment(self, match):

2294 if self.state_machine.is_next_line_blank():

2295 first_comment_line = match.string[match.end():]

2296 if not first_comment_line.strip(): # empty comment

2297 return [nodes.comment()], True # "A tiny but practical wart."

2298 if first_comment_line.startswith('end of inclusion from "'):

2299 # cf. parsers.rst.directives.misc.Include

2300 self.document.include_log.pop()

2301 return [], True

2302 (indented, indent, offset, blank_finish

2303 ) = self.state_machine.get_first_known_indented(match.end())

2304 while indented and not indented[-1].strip():

2305 indented.trim_end()

2306 text = '\n'.join(indented)

2307 return [nodes.comment(text, text)], blank_finish

2308

2309 explicit.constructs = [

2310 (footnote,

2311 re.compile(r"""

2312 \.\.[ ]+ # explicit markup start

2313 \[

2314 ( # footnote label:

2315 [0-9]+ # manually numbered footnote

2316 | # *OR*

2317 \# # anonymous auto-numbered footnote

2318 | # *OR*

2319 \#%s # auto-number ed?) footnote label

2320 | # *OR*

2321 \* # auto-symbol footnote

2322 )

2323 \]

2324 ([ ]+|$) # whitespace or end of line

2325 """ % Inliner.simplename, re.VERBOSE)),

2326 (citation,

2327 re.compile(r"""

2328 \.\.[ ]+ # explicit markup start

2329 \[(%s)\] # citation label

2330 ([ ]+|$) # whitespace or end of line

2331 """ % Inliner.simplename, re.VERBOSE)),

2332 (hyperlink_target,

2333 re.compile(r"""

2334 \.\.[ ]+ # explicit markup start

2335 _ # target indicator

2336 (?![ ]|$) # first char. not space or EOL

2337 """, re.VERBOSE)),

2338 (substitution_def,

2339 re.compile(r"""

2340 \.\.[ ]+ # explicit markup start

2341 \| # substitution indicator

2342 (?![ ]|$) # first char. not space or EOL

2343 """, re.VERBOSE)),

2344 (directive,

2345 re.compile(r"""

2346 \.\.[ ]+ # explicit markup start

2347 (%s) # directive name

2348 [ ]? # optional space

2349 :: # directive delimiter

2350 ([ ]+|$) # whitespace or end of line

2351 """ % Inliner.simplename, re.VERBOSE))]

2352

2353 def explicit_markup(self, match, context, next_state):

2354 """Footnotes, hyperlink targets, directives, comments."""

2355 nodelist, blank_finish = self.explicit_construct(match)

2356 self.parent += nodelist

2357 self.explicit_list(blank_finish)

2358 return [], next_state, []

2359

2360 def explicit_construct(self, match):

2361 """Determine which explicit construct this is, parse & return it."""

2362 errors = []

2363 for method, pattern in self.explicit.constructs:

2364 expmatch = pattern.match(match.string)

2365 if expmatch:

2366 try:

2367 return method(self, expmatch)

2368 except MarkupError as error:

2369 lineno = self.state_machine.abs_line_number()

2370 message = ' '.join(error.args)

2371 errors.append(self.reporter.warning(message, line=lineno))

2372 break

2373 nodelist, blank_finish = self.comment(match)

2374 return nodelist + errors, blank_finish

2375

2376 def explicit_list(self, blank_finish):

2377 """

2378 Create a nested state machine for a series of explicit markup

2379 constructs (including anonymous hyperlink targets).

2380 """

2381 offset = self.state_machine.line_offset + 1 # next line

2382 newline_offset, blank_finish = self.nested_list_parse(

2383 self.state_machine.input_lines[offset:],

2384 input_offset=self.state_machine.abs_line_offset() + 1,

2385 node=self.parent, initial_state='Explicit',

2386 blank_finish=blank_finish,

2387 match_titles=self.state_machine.match_titles)

2388 self.goto_line(newline_offset)

2389 if not blank_finish:

2390 self.parent += self.unindent_warning('Explicit markup')

2391

2392 def anonymous(self, match, context, next_state):

2393 """Anonymous hyperlink targets."""

2394 nodelist, blank_finish = self.anonymous_target(match)

2395 self.parent += nodelist

2396 self.explicit_list(blank_finish)

2397 return [], next_state, []

2398

2399 def anonymous_target(self, match):

2400 lineno = self.state_machine.abs_line_number()

2401 (block, indent, offset, blank_finish

2402 ) = self.state_machine.get_first_known_indented(match.end(),

2403 until_blank=True)

2404 blocktext = match.string[:match.end()] + '\n'.join(block)

2405 block = [escape2null(line) for line in block]

2406 target = self.make_target(block, blocktext, lineno, '')

2407 return [target], blank_finish

2408

2409 def line(self, match, context, next_state):

2410 """Section title overline or transition marker."""

2411 if self.state_machine.match_titles:

2412 return [match.string], 'Line', []

2413 elif match.string.strip() == '::':

2414 raise statemachine.TransitionCorrection('text')

2415 elif len(match.string.strip()) < 4:

2416 msg = self.reporter.info(

2417 'Unexpected possible title overline or transition.\n'

2418 "Treating it as ordinary text because it's so short.",

2419 line=self.state_machine.abs_line_number())

2420 self.parent += msg

2421 raise statemachine.TransitionCorrection('text')

2422 else:

2423 blocktext = self.state_machine.line

2424 msg = self.reporter.severe(

2425 'Unexpected section title or transition.',

2426 nodes.literal_block(blocktext, blocktext),

2427 line=self.state_machine.abs_line_number())

2428 self.parent += msg

2429 return [], next_state, []

2430

2431 def text(self, match, context, next_state):

2432 """Titles, definition lists, paragraphs."""

2433 return [match.string], 'Text', []

2434

2435

2436class RFC2822Body(Body):

2437

2438 """

2439 RFC2822 headers are only valid as the first constructs in documents. As

2440 soon as anything else appears, the `Body` state should take over.

2441 """

2442

2443 patterns = Body.patterns.copy() # can't modify the original

2444 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'

2445 initial_transitions = [(name, 'Body')

2446 for name in Body.initial_transitions]

2447 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'

2448

2449 def rfc2822(self, match, context, next_state):

2450 """RFC2822-style field list item."""

2451 fieldlist = nodes.field_list(classes=['rfc2822'])

2452 self.parent += fieldlist

2453 field, blank_finish = self.rfc2822_field(match)

2454 fieldlist += field

2455 offset = self.state_machine.line_offset + 1 # next line

2456 newline_offset, blank_finish = self.nested_list_parse(

2457 self.state_machine.input_lines[offset:],

2458 input_offset=self.state_machine.abs_line_offset() + 1,

2459 node=fieldlist, initial_state='RFC2822List',

2460 blank_finish=blank_finish)

2461 self.goto_line(newline_offset)

2462 if not blank_finish:

2463 self.parent += self.unindent_warning(

2464 'RFC2822-style field list')

2465 return [], next_state, []

2466

2467 def rfc2822_field(self, match):

2468 name = match.string[:match.string.find(':')]

2469 (indented, indent, line_offset, blank_finish

2470 ) = self.state_machine.get_first_known_indented(match.end(),

2471 until_blank=True)

2472 fieldnode = nodes.field()

2473 fieldnode += nodes.field_name(name, name)

2474 fieldbody = nodes.field_body('\n'.join(indented))

2475 fieldnode += fieldbody

2476 if indented:

2477 self.nested_parse(indented, input_offset=line_offset,

2478 node=fieldbody)

2479 return fieldnode, blank_finish

2480

2481

2482class SpecializedBody(Body):

2483

2484 """

2485 Superclass for second and subsequent compound element members. Compound

2486 elements are lists and list-like constructs.

2487

2488 All transition methods are disabled (redefined as `invalid_input`).

2489 Override individual methods in subclasses to re-enable.

2490

2491 For example, once an initial bullet list item, say, is recognized, the

2492 `BulletList` subclass takes over, with a "bullet_list" node as its

2493 container. Upon encountering the initial bullet list item, `Body.bullet`

2494 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which

2495 starts up a nested parsing session with `BulletList` as the initial state.

2496 Only the ``bullet`` transition method is enabled in `BulletList`; as long

2497 as only bullet list items are encountered, they are parsed and inserted

2498 into the container. The first construct which is *not* a bullet list item

2499 triggers the `invalid_input` method, which ends the nested parse and

2500 closes the container. `BulletList` needs to recognize input that is

2501 invalid in the context of a bullet list, which means everything *other

2502 than* bullet list items, so it inherits the transition list created in

2503 `Body`.

2504 """

2505

2506 def invalid_input(self, match=None, context=None, next_state=None):

2507 """Not a compound element member. Abort this state machine."""

2508 self.state_machine.previous_line() # back up so parent SM can reassess

2509 raise EOFError

2510

2511 indent = invalid_input

2512 bullet = invalid_input

2513 enumerator = invalid_input

2514 field_marker = invalid_input

2515 option_marker = invalid_input

2516 doctest = invalid_input

2517 line_block = invalid_input

2518 grid_table_top = invalid_input

2519 simple_table_top = invalid_input

2520 explicit_markup = invalid_input

2521 anonymous = invalid_input

2522 line = invalid_input

2523 text = invalid_input

2524

2525

2526class BulletList(SpecializedBody):

2527

2528 """Second and subsequent bullet_list list_items."""

2529

2530 def bullet(self, match, context, next_state):

2531 """Bullet list item."""

2532 if match.string[0] != self.parent['bullet']:

2533 # different bullet: new list

2534 self.invalid_input()

2535 listitem, blank_finish = self.list_item(match.end())

2536 self.parent += listitem

2537 self.blank_finish = blank_finish

2538 return [], next_state, []

2539

2540

2541class DefinitionList(SpecializedBody):

2542

2543 """Second and subsequent definition_list_items."""

2544

2545 def text(self, match, context, next_state):

2546 """Definition lists."""

2547 return [match.string], 'Definition', []

2548

2549

2550class EnumeratedList(SpecializedBody):

2551

2552 """Second and subsequent enumerated_list list_items."""

2553

2554 def enumerator(self, match, context, next_state):

2555 """Enumerated list item."""

2556 format, sequence, text, ordinal = self.parse_enumerator(

2557 match, self.parent['enumtype'])

2558 if (format != self.format

2559 or (sequence != '#' and (sequence != self.parent['enumtype']

2560 or self.auto

2561 or ordinal != (self.lastordinal + 1)))

2562 or not self.is_enumerated_list_item(ordinal, sequence, format)):

2563 # different enumeration: new list

2564 self.invalid_input()

2565 if sequence == '#':

2566 self.auto = 1

2567 listitem, blank_finish = self.list_item(match.end())

2568 self.parent += listitem

2569 self.blank_finish = blank_finish

2570 self.lastordinal = ordinal

2571 return [], next_state, []

2572

2573

2574class FieldList(SpecializedBody):

2575

2576 """Second and subsequent field_list fields."""

2577

2578 def field_marker(self, match, context, next_state):

2579 """Field list field."""

2580 field, blank_finish = self.field(match)

2581 self.parent += field

2582 self.blank_finish = blank_finish

2583 return [], next_state, []

2584

2585

2586class OptionList(SpecializedBody):

2587

2588 """Second and subsequent option_list option_list_items."""

2589

2590 def option_marker(self, match, context, next_state):

2591 """Option list item."""

2592 try:

2593 option_list_item, blank_finish = self.option_list_item(match)

2594 except MarkupError:

2595 self.invalid_input()

2596 self.parent += option_list_item

2597 self.blank_finish = blank_finish

2598 return [], next_state, []

2599

2600

2601class RFC2822List(SpecializedBody, RFC2822Body):

2602

2603 """Second and subsequent RFC2822-style field_list fields."""

2604

2605 patterns = RFC2822Body.patterns

2606 initial_transitions = RFC2822Body.initial_transitions

2607

2608 def rfc2822(self, match, context, next_state):

2609 """RFC2822-style field list item."""

2610 field, blank_finish = self.rfc2822_field(match)

2611 self.parent += field

2612 self.blank_finish = blank_finish

2613 return [], 'RFC2822List', []

2614

2615 blank = SpecializedBody.invalid_input

2616

2617

2618class ExtensionOptions(FieldList):

2619

2620 """

2621 Parse field_list fields for extension options.

2622

2623 No nested parsing is done (including inline markup parsing).

2624 """

2625

2626 def parse_field_body(self, indented, offset, node):

2627 """Override `Body.parse_field_body` for simpler parsing."""

2628 lines = []

2629 for line in list(indented) + ['']:

2630 if line.strip():

2631 lines.append(line)

2632 elif lines:

2633 text = '\n'.join(lines)

2634 node += nodes.paragraph(text, text)

2635 lines = []

2636

2637

2638class LineBlock(SpecializedBody):

2639

2640 """Second and subsequent lines of a line_block."""

2641

2642 blank = SpecializedBody.invalid_input

2643

2644 def line_block(self, match, context, next_state):

2645 """New line of line block."""

2646 lineno = self.state_machine.abs_line_number()

2647 line, messages, blank_finish = self.line_block_line(match, lineno)

2648 self.parent += line

2649 self.parent.parent += messages

2650 self.blank_finish = blank_finish

2651 return [], next_state, []

2652

2653

2654class Explicit(SpecializedBody):

2655

2656 """Second and subsequent explicit markup construct."""

2657

2658 def explicit_markup(self, match, context, next_state):

2659 """Footnotes, hyperlink targets, directives, comments."""

2660 nodelist, blank_finish = self.explicit_construct(match)

2661 self.parent += nodelist

2662 self.blank_finish = blank_finish

2663 return [], next_state, []

2664

2665 def anonymous(self, match, context, next_state):

2666 """Anonymous hyperlink targets."""

2667 nodelist, blank_finish = self.anonymous_target(match)

2668 self.parent += nodelist

2669 self.blank_finish = blank_finish

2670 return [], next_state, []

2671

2672 blank = SpecializedBody.invalid_input

2673

2674

2675class SubstitutionDef(Body):

2676

2677 """

2678 Parser for the contents of a substitution_definition element.

2679 """

2680

2681 patterns = {

2682 'embedded_directive': re.compile(r'(%s)::( +|$)'

2683 % Inliner.simplename),

2684 'text': r''}

2685 initial_transitions = ['embedded_directive', 'text']

2686

2687 def embedded_directive(self, match, context, next_state):

2688 nodelist, blank_finish = self.directive(match,

2689 alt=self.parent['names'][0])

2690 self.parent += nodelist

2691 if not self.state_machine.at_eof():

2692 self.blank_finish = blank_finish

2693 raise EOFError

2694

2695 def text(self, match, context, next_state):

2696 if not self.state_machine.at_eof():

2697 self.blank_finish = self.state_machine.is_next_line_blank()

2698 raise EOFError

2699

2700

2701class Text(RSTState):

2702

2703 """

2704 Classifier of second line of a text block.

2705

2706 Could be a paragraph, a definition list item, or a title.

2707 """

2708

2709 patterns = {'underline': Body.patterns['line'],

2710 'text': r''}

2711 initial_transitions = [('underline', 'Body'), ('text', 'Body')]

2712

2713 def blank(self, match, context, next_state):

2714 """End of paragraph."""

2715 # NOTE: self.paragraph returns [node, system_message(s)], literalnext

2716 paragraph, literalnext = self.paragraph(

2717 context, self.state_machine.abs_line_number() - 1)

2718 self.parent += paragraph

2719 if literalnext:

2720 self.parent += self.literal_block()

2721 return [], 'Body', []

2722

2723 def eof(self, context):

2724 if context:

2725 self.blank(None, context, None)

2726 return []

2727

2728 def indent(self, match, context, next_state):

2729 """Definition list item."""

2730 definitionlist = nodes.definition_list()

2731 definitionlistitem, blank_finish = self.definition_list_item(context)

2732 definitionlist += definitionlistitem

2733 self.parent += definitionlist

2734 offset = self.state_machine.line_offset + 1 # next line

2735 newline_offset, blank_finish = self.nested_list_parse(

2736 self.state_machine.input_lines[offset:],

2737 input_offset=self.state_machine.abs_line_offset() + 1,

2738 node=definitionlist, initial_state='DefinitionList',

2739 blank_finish=blank_finish, blank_finish_state='Definition')

2740 self.goto_line(newline_offset)

2741 if not blank_finish:

2742 self.parent += self.unindent_warning('Definition list')

2743 return [], 'Body', []

2744

2745 def underline(self, match, context, next_state):

2746 """Section title."""

2747 lineno = self.state_machine.abs_line_number()

2748 title = context[0].rstrip()

2749 underline = match.string.rstrip()

2750 source = title + '\n' + underline

2751 messages = []

2752 if column_width(title) > len(underline):

2753 if len(underline) < 4:

2754 if self.state_machine.match_titles:

2755 msg = self.reporter.info(

2756 'Possible title underline, too short for the title.\n'

2757 "Treating it as ordinary text because it's so short.",

2758 line=lineno)

2759 self.parent += msg

2760 raise statemachine.TransitionCorrection('text')

2761 else:

2762 blocktext = context[0] + '\n' + self.state_machine.line

2763 msg = self.reporter.warning(

2764 'Title underline too short.',

2765 nodes.literal_block(blocktext, blocktext),

2766 line=lineno)

2767 messages.append(msg)

2768 if not self.state_machine.match_titles:

2769 blocktext = context[0] + '\n' + self.state_machine.line

2770 # We need get_source_and_line() here to report correctly

2771 src, srcline = self.state_machine.get_source_and_line()

2772 # TODO: why is abs_line_number() == srcline+1

2773 # if the error is in a table (try with test_tables.py)?

2774 # print("get_source_and_line", srcline)

2775 # print("abs_line_number", self.state_machine.abs_line_number())

2776 msg = self.reporter.severe(

2777 'Unexpected section title.',

2778 nodes.literal_block(blocktext, blocktext),

2779 source=src, line=srcline)

2780 self.parent += messages

2781 self.parent += msg

2782 return [], next_state, []

2783 style = underline[0]

2784 context[:] = []

2785 self.section(title, source, style, lineno - 1, messages)

2786 return [], next_state, []

2787

2788 def text(self, match, context, next_state):

2789 """Paragraph."""

2790 startline = self.state_machine.abs_line_number() - 1

2791 msg = None

2792 try:

2793 block = self.state_machine.get_text_block(flush_left=True)

2794 except statemachine.UnexpectedIndentationError as err:

2795 block, src, srcline = err.args

2796 msg = self.reporter.error('Unexpected indentation.',

2797 source=src, line=srcline)

2798 lines = context + list(block)

2799 paragraph, literalnext = self.paragraph(lines, startline)

2800 self.parent += paragraph

2801 self.parent += msg

2802 if literalnext:

2803 try:

2804 self.state_machine.next_line()

2805 except EOFError:

2806 pass

2807 self.parent += self.literal_block()

2808 return [], next_state, []

2809

2810 def literal_block(self):

2811 """Return a list of nodes."""

2812 (indented, indent, offset, blank_finish

2813 ) = self.state_machine.get_indented()

2814 while indented and not indented[-1].strip():

2815 indented.trim_end()

2816 if not indented:

2817 return self.quoted_literal_block()

2818 data = '\n'.join(indented)

2819 literal_block = nodes.literal_block(data, data)

2820 (literal_block.source,

2821 literal_block.line) = self.state_machine.get_source_and_line(offset+1)

2822 nodelist = [literal_block]

2823 if not blank_finish:

2824 nodelist.append(self.unindent_warning('Literal block'))

2825 return nodelist

2826

2827 def quoted_literal_block(self):

2828 abs_line_offset = self.state_machine.abs_line_offset()

2829 offset = self.state_machine.line_offset

2830 parent_node = nodes.Element()

2831 new_abs_offset = self.nested_parse(

2832 self.state_machine.input_lines[offset:],

2833 input_offset=abs_line_offset, node=parent_node, match_titles=False,

2834 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),

2835 'initial_state': 'QuotedLiteralBlock'})

2836 self.goto_line(new_abs_offset)

2837 return parent_node.children

2838

2839 def definition_list_item(self, termline):

2840 (indented, indent, line_offset, blank_finish

2841 ) = self.state_machine.get_indented()

2842 itemnode = nodes.definition_list_item(

2843 '\n'.join(termline + list(indented)))

2844 lineno = self.state_machine.abs_line_number() - 1

2845 (itemnode.source,

2846 itemnode.line) = self.state_machine.get_source_and_line(lineno)

2847 termlist, messages = self.term(termline, lineno)

2848 itemnode += termlist

2849 definition = nodes.definition('', *messages)

2850 itemnode += definition

2851 if termline[0][-2:] == '::':

2852 definition += self.reporter.info(

2853 'Blank line missing before literal block (after the "::")? '

2854 'Interpreted as a definition list item.',

2855 line=lineno+1)

2856 self.nested_parse(indented, input_offset=line_offset, node=definition)

2857 return itemnode, blank_finish

2858

2859 classifier_delimiter = re.compile(' +: +')

2860

2861 def term(self, lines, lineno):

2862 """Return a definition_list's term and optional classifiers."""

2863 assert len(lines) == 1

2864 text_nodes, messages = self.inline_text(lines[0], lineno)

2865 term_node = nodes.term(lines[0])

2866 (term_node.source,

2867 term_node.line) = self.state_machine.get_source_and_line(lineno)

2868 node_list = [term_node]

2869 for i in range(len(text_nodes)):

2870 node = text_nodes[i]

2871 if isinstance(node, nodes.Text):

2872 parts = self.classifier_delimiter.split(node)

2873 if len(parts) == 1:

2874 node_list[-1] += node

2875 else:

2876 text = parts[0].rstrip()

2877 textnode = nodes.Text(text)

2878 node_list[-1] += textnode

2879 for part in parts[1:]:

2880 node_list.append(

2881 nodes.classifier(unescape(part, True), part))

2882 else:

2883 node_list[-1] += node

2884 return node_list, messages

2885

2886

2887class SpecializedText(Text):

2888

2889 """

2890 Superclass for second and subsequent lines of Text-variants.

2891

2892 All transition methods are disabled. Override individual methods in

2893 subclasses to re-enable.

2894 """

2895

2896 def eof(self, context):

2897 """Incomplete construct."""

2898 return []

2899

2900 def invalid_input(self, match=None, context=None, next_state=None):

2901 """Not a compound element member. Abort this state machine."""

2902 raise EOFError

2903

2904 blank = invalid_input

2905 indent = invalid_input

2906 underline = invalid_input

2907 text = invalid_input

2908

2909

2910class Definition(SpecializedText):

2911

2912 """Second line of potential definition_list_item."""

2913

2914 def eof(self, context):

2915 """Not a definition."""

2916 self.state_machine.previous_line(2) # so parent SM can reassess

2917 return []

2918

2919 def indent(self, match, context, next_state):

2920 """Definition list item."""

2921 itemnode, blank_finish = self.definition_list_item(context)

2922 self.parent += itemnode

2923 self.blank_finish = blank_finish

2924 return [], 'DefinitionList', []

2925

2926

2927class Line(SpecializedText):

2928

2929 """

2930 Second line of over- & underlined section title or transition marker.

2931 """

2932

2933 eofcheck = 1 # @@@ ???

2934 """Set to 0 while parsing sections, so that we don't catch the EOF."""

2935

2936 def eof(self, context):

2937 """Transition marker at end of section or document."""

2938 marker = context[0].strip()

2939 if self.memo.section_bubble_up_kludge:

2940 self.memo.section_bubble_up_kludge = False

2941 elif len(marker) < 4:

2942 self.state_correction(context)

2943 if self.eofcheck: # ignore EOFError with sections

2944 src, srcline = self.state_machine.get_source_and_line()

2945 # lineno = self.state_machine.abs_line_number() - 1

2946 transition = nodes.transition(rawsource=context[0])

2947 transition.source = src

2948 transition.line = srcline - 1

2949 # transition.line = lineno

2950 self.parent += transition

2951 self.eofcheck = 1

2952 return []

2953

2954 def blank(self, match, context, next_state):

2955 """Transition marker."""

2956 src, srcline = self.state_machine.get_source_and_line()

2957 marker = context[0].strip()

2958 if len(marker) < 4:

2959 self.state_correction(context)

2960 transition = nodes.transition(rawsource=marker)

2961 transition.source = src

2962 transition.line = srcline - 1

2963 self.parent += transition

2964 return [], 'Body', []

2965

2966 def text(self, match, context, next_state):

2967 """Potential over- & underlined title."""

2968 lineno = self.state_machine.abs_line_number() - 1

2969 overline = context[0]

2970 title = match.string

2971 underline = ''

2972 try:

2973 underline = self.state_machine.next_line()

2974 except EOFError:

2975 blocktext = overline + '\n' + title

2976 if len(overline.rstrip()) < 4:

2977 self.short_overline(context, blocktext, lineno, 2)

2978 else:

2979 msg = self.reporter.severe(

2980 'Incomplete section title.',

2981 nodes.literal_block(blocktext, blocktext),

2982 line=lineno)

2983 self.parent += msg

2984 return [], 'Body', []

2985 source = '%s\n%s\n%s' % (overline, title, underline)

2986 overline = overline.rstrip()

2987 underline = underline.rstrip()

2988 if not self.transitions['underline'][0].match(underline):

2989 blocktext = overline + '\n' + title + '\n' + underline

2990 if len(overline.rstrip()) < 4:

2991 self.short_overline(context, blocktext, lineno, 2)

2992 else:

2993 msg = self.reporter.severe(

2994 'Missing matching underline for section title overline.',

2995 nodes.literal_block(source, source),

2996 line=lineno)

2997 self.parent += msg

2998 return [], 'Body', []

2999 elif overline != underline:

3000 blocktext = overline + '\n' + title + '\n' + underline

3001 if len(overline.rstrip()) < 4:

3002 self.short_overline(context, blocktext, lineno, 2)

3003 else:

3004 msg = self.reporter.severe(

3005 'Title overline & underline mismatch.',

3006 nodes.literal_block(source, source),

3007 line=lineno)

3008 self.parent += msg

3009 return [], 'Body', []

3010 title = title.rstrip()

3011 messages = []

3012 if column_width(title) > len(overline):

3013 blocktext = overline + '\n' + title + '\n' + underline

3014 if len(overline.rstrip()) < 4:

3015 self.short_overline(context, blocktext, lineno, 2)

3016 else:

3017 msg = self.reporter.warning(

3018 'Title overline too short.',

3019 nodes.literal_block(source, source),

3020 line=lineno)

3021 messages.append(msg)

3022 style = (overline[0], underline[0])

3023 self.eofcheck = 0 # @@@ not sure this is correct

3024 self.section(title.lstrip(), source, style, lineno + 1, messages)

3025 self.eofcheck = 1

3026 return [], 'Body', []

3027

3028 indent = text # indented title

3029

3030 def underline(self, match, context, next_state):

3031 overline = context[0]

3032 blocktext = overline + '\n' + self.state_machine.line

3033 lineno = self.state_machine.abs_line_number() - 1

3034 if len(overline.rstrip()) < 4:

3035 self.short_overline(context, blocktext, lineno, 1)

3036 msg = self.reporter.error(

3037 'Invalid section title or transition marker.',

3038 nodes.literal_block(blocktext, blocktext),

3039 line=lineno)

3040 self.parent += msg

3041 return [], 'Body', []

3042

3043 def short_overline(self, context, blocktext, lineno, lines=1):

3044 msg = self.reporter.info(

3045 'Possible incomplete section title.\nTreating the overline as '

3046 "ordinary text because it's so short.",

3047 line=lineno)

3048 self.parent += msg

3049 self.state_correction(context, lines)

3050

3051 def state_correction(self, context, lines=1):

3052 self.state_machine.previous_line(lines)

3053 context[:] = []

3054 raise statemachine.StateCorrection('Body', 'text')

3055

3056

3057class QuotedLiteralBlock(RSTState):

3058

3059 """

3060 Nested parse handler for quoted (unindented) literal blocks.

3061

3062 Special-purpose. Not for inclusion in `state_classes`.

3063 """

3064

3065 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,

3066 'text': r''}

3067 initial_transitions = ('initial_quoted', 'text')

3068

3069 def __init__(self, state_machine, debug=False):

3070 RSTState.__init__(self, state_machine, debug)

3071 self.messages = []

3072 self.initial_lineno = None

3073

3074 def blank(self, match, context, next_state):

3075 if context:

3076 raise EOFError

3077 else:

3078 return context, next_state, []

3079

3080 def eof(self, context):

3081 if context:

3082 src, srcline = self.state_machine.get_source_and_line(

3083 self.initial_lineno)

3084 text = '\n'.join(context)

3085 literal_block = nodes.literal_block(text, text)

3086 literal_block.source = src

3087 literal_block.line = srcline

3088 self.parent += literal_block

3089 else:

3090 self.parent += self.reporter.warning(

3091 'Literal block expected; none found.',

3092 line=self.state_machine.abs_line_number()

3093 ) # src not available, statemachine.input_lines is empty

3094 self.state_machine.previous_line()

3095 self.parent += self.messages

3096 return []

3097

3098 def indent(self, match, context, next_state):

3099 assert context, ('QuotedLiteralBlock.indent: context should not '

3100 'be empty!')

3101 self.messages.append(

3102 self.reporter.error('Unexpected indentation.',

3103 line=self.state_machine.abs_line_number()))

3104 self.state_machine.previous_line()

3105 raise EOFError

3106

3107 def initial_quoted(self, match, context, next_state):

3108 """Match arbitrary quote character on the first line only."""

3109 self.remove_transition('initial_quoted')

3110 quote = match.string[0]

3111 pattern = re.compile(re.escape(quote))

3112 # New transition matches consistent quotes only:

3113 self.add_transition('quoted',

3114 (pattern, self.quoted, self.__class__.__name__))

3115 self.initial_lineno = self.state_machine.abs_line_number()

3116 return [match.string], next_state, []

3117

3118 def quoted(self, match, context, next_state):

3119 """Match consistent quotes on subsequent lines."""

3120 context.append(match.string)

3121 return context, next_state, []

3122

3123 def text(self, match, context, next_state):

3124 if context:

3125 self.messages.append(

3126 self.reporter.error('Inconsistent literal block quoting.',

3127 line=self.state_machine.abs_line_number()))

3128 self.state_machine.previous_line()

3129 raise EOFError

3130

3131

3132state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,

3133 OptionList, LineBlock, ExtensionOptions, Explicit, Text,

3134 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)

3135"""Standard set of State classes used to start `RSTStateMachine`."""