1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
4
5"""
6This is the ``docutils.parsers.rst.states`` module, the core of
7the reStructuredText parser. It defines the following:
8
9:Classes:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: obsolete, use `types.SimpleNamespace`.
30
31:Exception classes:
32 - `MarkupError`
33 - `ParserError`
34 - `MarkupMismatch`
35
36:Functions:
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
39
40:Attributes:
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
42
43Parser Overview
44===============
45
46The reStructuredText parser is implemented as a recursive state machine,
47examining its input one line at a time. To understand how the parser works,
48please first become familiar with the `docutils.statemachine` module. In the
49description below, references are made to classes defined in this module;
50please see the individual classes for details.
51
52Parsing proceeds as follows:
53
541. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
58
592. The method associated with the matched transition pattern is called.
60
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
65
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
70
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
73
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
83
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
86
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
90
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
93
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
96 used.
97
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
101"""
102
103from __future__ import annotations
104
105__docformat__ = 'reStructuredText'
106
107import copy
108import re
109from types import FunctionType, MethodType
110from types import SimpleNamespace as Struct
111
112from docutils import nodes, statemachine, utils
113from docutils import ApplicationError, DataError
114from docutils.statemachine import StateMachineWS, StateWS
115from docutils.nodes import fully_normalize_name as normalize_name
116from docutils.nodes import unescape, whitespace_normalize_name
117import docutils.parsers.rst
118from docutils.parsers.rst import directives, languages, tableparser, roles
119from docutils.utils import escape2null, column_width
120from docutils.utils import punctuation_chars, urischemes
121from docutils.utils import split_escaped_whitespace
122from docutils.utils._roman_numerals import (InvalidRomanNumeralError,
123 RomanNumeral)
124
125TYPE_CHECKING = False
126if TYPE_CHECKING:
127 from docutils.statemachine import StringList
128
129
130class MarkupError(DataError): pass
131class UnknownInterpretedRoleError(DataError): pass
132class InterpretedRoleNotImplementedError(DataError): pass
133class ParserError(ApplicationError): pass
134class MarkupMismatch(Exception): pass
135
136
137class RSTStateMachine(StateMachineWS):
138
139 """
140 reStructuredText's master StateMachine.
141
142 The entry point to reStructuredText parsing is the `run()` method.
143 """
144
145 def run(self, input_lines, document, input_offset=0, match_titles=True,
146 inliner=None) -> None:
147 """
148 Parse `input_lines` and modify the `document` node in place.
149
150 Extend `StateMachineWS.run()`: set up parse-global data and
151 run the StateMachine.
152 """
153 self.language = languages.get_language(
154 document.settings.language_code, document.reporter)
155 self.match_titles = match_titles
156 if inliner is None:
157 inliner = Inliner()
158 inliner.init_customizations(document.settings)
159 # A collection of objects to share with nested parsers.
160 # The attributes `reporter`, `section_level`, and
161 # `section_bubble_up_kludge` will be removed in Docutils 2.0
162 self.memo = Struct(document=document,
163 reporter=document.reporter, # ignored
164 language=self.language,
165 title_styles=[],
166 section_level=0, # ignored
167 section_bubble_up_kludge=False, # ignored
168 inliner=inliner)
169 self.document = document
170 self.attach_observer(document.note_source)
171 self.reporter = self.document.reporter
172 self.node = document
173 results = StateMachineWS.run(self, input_lines, input_offset,
174 input_source=document['source'])
175 assert results == [], 'RSTStateMachine.run() results should be empty!'
176 self.node = self.memo = None # remove unneeded references
177
178
179class NestedStateMachine(StateMachineWS):
180 """
181 StateMachine run from within other StateMachine runs, to parse nested
182 document structures.
183 """
184
185 def run(self, input_lines, input_offset, memo, node, match_titles=True):
186 """
187 Parse `input_lines` and populate `node`.
188
189 Use a separate "title style hierarchy" (changed in Docutils 0.23).
190
191 Extend `StateMachineWS.run()`: set up document-wide data.
192 """
193 self.match_titles = match_titles
194 self.memo = copy.copy(memo)
195 self.document = memo.document
196 self.attach_observer(self.document.note_source)
197 self.language = memo.language
198 self.reporter = self.document.reporter
199 self.node = node
200 if match_titles:
201 # Use a separate section title style hierarchy;
202 # ensure all sections in the `input_lines` are treated as
203 # subsections of the current section by blocking lower
204 # section levels with a style that is impossible in rST:
205 self.memo.title_styles = ['x'] * len(node.section_hierarchy())
206 results = StateMachineWS.run(self, input_lines, input_offset)
207 assert results == [], ('NestedStateMachine.run() results should be '
208 'empty!')
209 return results
210
211
212class RSTState(StateWS):
213
214 """
215 reStructuredText State superclass.
216
217 Contains methods used by all State subclasses.
218 """
219
220 nested_sm = NestedStateMachine
221 nested_sm_cache = []
222
223 def __init__(self, state_machine, debug=False) -> None:
224 self.nested_sm_kwargs = {'state_classes': state_classes,
225 'initial_state': 'Body'}
226 StateWS.__init__(self, state_machine, debug)
227
228 def runtime_init(self) -> None:
229 StateWS.runtime_init(self)
230 memo = self.state_machine.memo
231 self.memo = memo
232 self.document = memo.document
233 self.inliner = memo.inliner
234 self.reporter = self.document.reporter
235 self.parent = self.state_machine.node
236 # enable the reporter to determine source and source-line
237 if not hasattr(self.reporter, 'get_source_and_line'):
238 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501
239
240 def goto_line(self, abs_line_offset) -> None:
241 """
242 Jump to input line `abs_line_offset`, ignoring jumps past the end.
243 """
244 try:
245 self.state_machine.goto_line(abs_line_offset)
246 except EOFError:
247 pass
248
249 def no_match(self, context, transitions):
250 """
251 Override `StateWS.no_match` to generate a system message.
252
253 This code should never be run.
254 """
255 self.reporter.severe(
256 'Internal error: no transition pattern match. State: "%s"; '
257 'transitions: %s; context: %s; current line: %r.'
258 % (self.__class__.__name__, transitions, context,
259 self.state_machine.line))
260 return context, None, []
261
262 def bof(self, context):
263 """Called at beginning of file."""
264 return [], []
265
266 def nested_parse(self,
267 block: StringList,
268 input_offset: int,
269 node: nodes.Element,
270 match_titles: bool = False,
271 state_machine_class: StateMachineWS|None = None,
272 state_machine_kwargs: dict|None = None
273 ) -> int:
274 """
275 Parse the input `block` with a nested state-machine rooted at `node`.
276
277 :block:
278 reStructuredText source extract.
279 :input_offset:
280 Line number at start of the block.
281 :node:
282 Base node. All generated nodes will be appended to this node.
283 :match_titles:
284 Allow section titles?
285 A separate section title style hierarchy is used for the nested
286 parsing (all sections are subsections of the current section).
287 The calling code should check whether sections are valid
288 children of the base node and move them or warn otherwise.
289 :state_machine_class:
290 Default: `NestedStateMachine`.
291 :state_machine_kwargs:
292 Keyword arguments for the state-machine instantiation.
293 Default: `self.nested_sm_kwargs`.
294
295 Create a new state-machine instance if required.
296 Return new offset.
297 """
298 use_default = 0
299 if state_machine_class is None:
300 state_machine_class = self.nested_sm
301 use_default += 1
302 if state_machine_kwargs is None:
303 state_machine_kwargs = self.nested_sm_kwargs
304 use_default += 1
305 state_machine = None
306 if use_default == 2:
307 try:
308 state_machine = self.nested_sm_cache.pop()
309 except IndexError:
310 pass
311 if not state_machine:
312 state_machine = state_machine_class(debug=self.debug,
313 **state_machine_kwargs)
314 # run the statemachine and populate `node`:
315 block_length = len(block)
316 state_machine.run(block, input_offset, memo=self.memo,
317 node=node, match_titles=match_titles)
318 # clean up
319 if use_default == 2:
320 self.nested_sm_cache.append(state_machine)
321 else:
322 state_machine.unlink()
323 new_offset = state_machine.abs_line_offset()
324 # No `block.parent` implies disconnected -- lines aren't in sync:
325 if block.parent and (len(block) - block_length) != 0:
326 # Adjustment for block if modified in nested parse:
327 self.state_machine.next_line(len(block) - block_length)
328 return new_offset
329
330 def nested_list_parse(self, block, input_offset, node, initial_state,
331 blank_finish,
332 blank_finish_state=None,
333 extra_settings={},
334 match_titles=False,
335 state_machine_class=None,
336 state_machine_kwargs=None):
337 """
338 Parse the input `block` with a nested state-machine rooted at `node`.
339
340 Create a new StateMachine rooted at `node` and run it over the
341 input `block` (see also `nested_parse()`).
342 Also keep track of optional intermediate blank lines and the
343 required final one.
344
345 Return new offset and a boolean indicating whether there was a
346 blank final line.
347 """
348 if state_machine_class is None:
349 state_machine_class = self.nested_sm
350 if state_machine_kwargs is None:
351 state_machine_kwargs = self.nested_sm_kwargs.copy()
352 state_machine_kwargs['initial_state'] = initial_state
353 state_machine = state_machine_class(debug=self.debug,
354 **state_machine_kwargs)
355 if blank_finish_state is None:
356 blank_finish_state = initial_state
357 state_machine.states[blank_finish_state].blank_finish = blank_finish
358 for key, value in extra_settings.items():
359 setattr(state_machine.states[initial_state], key, value)
360 state_machine.run(block, input_offset, memo=self.memo,
361 node=node, match_titles=match_titles)
362 blank_finish = state_machine.states[blank_finish_state].blank_finish
363 state_machine.unlink()
364 return state_machine.abs_line_offset(), blank_finish
365
366 def section(self, title, source, style, lineno, messages) -> None:
367 """Check for a valid subsection and create one if it checks out."""
368 if self.check_subsection(source, style, lineno):
369 self.new_subsection(title, lineno, messages)
370
371 def check_subsection(self, source, style, lineno) -> bool:
372 """
373 Check for a valid subsection header. Update section data in `memo`.
374
375 When a new section is reached that isn't a subsection of the current
376 section, set `self.parent` to the new section's parent section
377 (or the root node if the new section is a top-level section).
378 """
379 title_styles = self.memo.title_styles
380 parent_sections = self.parent.section_hierarchy()
381 # current section level: (0 root, 1 section, 2 subsection, ...)
382 oldlevel = len(parent_sections)
383 # new section level:
384 try: # check for existing title style
385 newlevel = title_styles.index(style) + 1
386 except ValueError: # new title style
387 newlevel = len(title_styles) + 1
388 # The new level must not be deeper than an immediate child
389 # of the current level:
390 if newlevel > oldlevel + 1:
391 styles = ' '.join('/'.join(style) for style in title_styles)
392 self.parent += self.reporter.error(
393 'Inconsistent title style:'
394 f' skip from level {oldlevel} to {newlevel}.',
395 nodes.literal_block('', source),
396 nodes.paragraph('', f'Established title styles: {styles}'),
397 line=lineno)
398 return False
399 # Update parent state:
400 if newlevel > len(title_styles):
401 title_styles.append(style)
402 self.memo.section_level = newlevel
403 if newlevel <= oldlevel:
404 # new section is sibling or higher up in the section hierarchy
405 self.parent = parent_sections[newlevel-1].parent
406 return True
407
408 def title_inconsistent(self, sourcetext, lineno):
409 # Ignored. Will be removed in Docutils 2.0.
410 error = self.reporter.error(
411 'Title level inconsistent:', nodes.literal_block('', sourcetext),
412 line=lineno)
413 return error
414
415 def new_subsection(self, title, lineno, messages):
416 """Append new subsection to document tree."""
417 section_node = nodes.section()
418 self.parent += section_node
419 textnodes, title_messages = self.inline_text(title, lineno)
420 titlenode = nodes.title(title, '', *textnodes)
421 name = normalize_name(titlenode.astext())
422 section_node['names'].append(name)
423 section_node += titlenode
424 section_node += messages
425 section_node += title_messages
426 self.document.note_implicit_target(section_node, section_node)
427 # Update state:
428 self.state_machine.node = section_node
429 # Also update the ".parent" attribute in all states.
430 # This is a bit violent, but the state classes copy their .parent from
431 # state_machine.node on creation, so we need to update them. We could
432 # also remove RSTState.parent entirely and replace references to it
433 # with statemachine.node, but that might break code downstream of
434 # docutils.
435 for s in self.state_machine.states.values():
436 s.parent = section_node
437
438 def paragraph(self, lines, lineno):
439 """
440 Return a list (paragraph & messages) & a boolean: literal_block next?
441 """
442 data = '\n'.join(lines).rstrip()
443 if re.search(r'(?<!\\)(\\\\)*::$', data):
444 if len(data) == 2:
445 return [], 1
446 elif data[-3] in ' \n':
447 text = data[:-3].rstrip()
448 else:
449 text = data[:-1]
450 literalnext = 1
451 else:
452 text = data
453 literalnext = 0
454 textnodes, messages = self.inline_text(text, lineno)
455 p = nodes.paragraph(data, '', *textnodes)
456 p.source, p.line = self.state_machine.get_source_and_line(lineno)
457 return [p] + messages, literalnext
458
459 def inline_text(self, text, lineno):
460 """
461 Return 2 lists: nodes (text and inline elements), and system_messages.
462 """
463 nodes, messages = self.inliner.parse(text, lineno,
464 self.memo, self.parent)
465 return nodes, messages
466
467 def unindent_warning(self, node_name):
468 # the actual problem is one line below the current line
469 lineno = self.state_machine.abs_line_number() + 1
470 return self.reporter.warning('%s ends without a blank line; '
471 'unexpected unindent.' % node_name,
472 line=lineno)
473
474
475def build_regexp(definition, compile_patterns=True):
476 """
477 Build, compile and return a regular expression based on `definition`.
478
479 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
480 where "parts" is a list of regular expressions and/or regular
481 expression definitions to be joined into an or-group.
482 """
483 name, prefix, suffix, parts = definition
484 part_strings = []
485 for part in parts:
486 if isinstance(part, tuple):
487 part_strings.append(build_regexp(part, None))
488 else:
489 part_strings.append(part)
490 or_group = '|'.join(part_strings)
491 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
492 if compile_patterns:
493 return re.compile(regexp)
494 else:
495 return regexp
496
497
498class Inliner:
499
500 """
501 Parse inline markup; call the `parse()` method.
502 """
503
504 def __init__(self) -> None:
505 self.implicit_dispatch = []
506 """List of (pattern, bound method) tuples, used by
507 `self.implicit_inline`."""
508
509 def init_customizations(self, settings) -> None:
510 # lookahead and look-behind expressions for inline markup rules
511 if getattr(settings, 'character_level_inline_markup', False):
512 start_string_prefix = '(^|(?<!\x00))'
513 end_string_suffix = ''
514 else:
515 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %
516 (punctuation_chars.openers,
517 punctuation_chars.delimiters))
518 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %
519 (punctuation_chars.closing_delimiters,
520 punctuation_chars.delimiters,
521 punctuation_chars.closers))
522 args = locals().copy()
523 args.update(vars(self.__class__))
524
525 parts = ('initial_inline', start_string_prefix, '',
526 [
527 ('start', '', self.non_whitespace_after, # simple start-strings
528 [r'\*\*', # strong
529 r'\*(?!\*)', # emphasis but not strong
530 r'``', # literal
531 r'_`', # inline internal target
532 r'\|(?!\|)'] # substitution reference
533 ),
534 ('whole', '', end_string_suffix, # whole constructs
535 [ # reference name & end-string
536 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,
537 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
538 [r'[0-9]+', # manually numbered
539 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)
540 r'\*', # auto-symbol
541 r'(?P<citationlabel>%s)' % self.simplename, # citation ref
542 ]
543 )
544 ]
545 ),
546 ('backquote', # interpreted text or phrase reference
547 '(?P<role>(:%s:)?)' % self.simplename, # optional role
548 self.non_whitespace_after,
549 ['`(?!`)'] # but not literal
550 )
551 ]
552 )
553 self.start_string_prefix = start_string_prefix
554 self.end_string_suffix = end_string_suffix
555 self.parts = parts
556
557 self.patterns = Struct(
558 initial=build_regexp(parts),
559 emphasis=re.compile(self.non_whitespace_escape_before
560 + r'(\*)' + end_string_suffix),
561 strong=re.compile(self.non_whitespace_escape_before
562 + r'(\*\*)' + end_string_suffix),
563 interpreted_or_phrase_ref=re.compile(
564 r"""
565 %(non_unescaped_whitespace_escape_before)s
566 (
567 `
568 (?P<suffix>
569 (?P<role>:%(simplename)s:)?
570 (?P<refend>__?)?
571 )
572 )
573 %(end_string_suffix)s
574 """ % args, re.VERBOSE),
575 embedded_link=re.compile(
576 r"""
577 (
578 (?:[ \n]+|^) # spaces or beginning of line/string
579 < # open bracket
580 %(non_whitespace_after)s
581 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets
582 %(non_whitespace_escape_before)s
583 > # close bracket
584 )
585 $ # end of string
586 """ % args, re.VERBOSE),
587 literal=re.compile(self.non_whitespace_before + '(``)'
588 + end_string_suffix),
589 target=re.compile(self.non_whitespace_escape_before
590 + r'(`)' + end_string_suffix),
591 substitution_ref=re.compile(self.non_whitespace_escape_before
592 + r'(\|_{0,2})'
593 + end_string_suffix),
594 email=re.compile(self.email_pattern % args + '$',
595 re.VERBOSE),
596 uri=re.compile(
597 (r"""
598 %(start_string_prefix)s
599 (?P<whole>
600 (?P<absolute> # absolute URI
601 (?P<scheme> # scheme (http, ftp, mailto)
602 [a-zA-Z][a-zA-Z0-9.+-]*
603 )
604 :
605 (
606 ( # either:
607 (//?)? # hierarchical URI
608 %(uric)s* # URI characters
609 %(uri_end)s # final URI char
610 )
611 ( # optional query
612 \?%(uric)s*
613 %(uri_end)s
614 )?
615 ( # optional fragment
616 \#%(uric)s*
617 %(uri_end)s
618 )?
619 )
620 )
621 | # *OR*
622 (?P<email> # email address
623 """ + self.email_pattern + r"""
624 )
625 )
626 %(end_string_suffix)s
627 """) % args, re.VERBOSE),
628 pep=re.compile(
629 r"""
630 %(start_string_prefix)s
631 (
632 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
633 |
634 (PEP\s+(?P<pepnum2>\d+)) # reference by name
635 )
636 %(end_string_suffix)s""" % args, re.VERBOSE),
637 rfc=re.compile(
638 r"""
639 %(start_string_prefix)s
640 (RFC(-|\s+)?(?P<rfcnum>\d+))
641 %(end_string_suffix)s""" % args, re.VERBOSE))
642
643 self.implicit_dispatch.append((self.patterns.uri,
644 self.standalone_uri))
645 if settings.pep_references:
646 self.implicit_dispatch.append((self.patterns.pep,
647 self.pep_reference))
648 if settings.rfc_references:
649 self.implicit_dispatch.append((self.patterns.rfc,
650 self.rfc_reference))
651
652 def parse(self, text, lineno, memo, parent):
653 # Needs to be refactored for nested inline markup.
654 # Add nested_parse() method?
655 """
656 Return 2 lists: nodes (text and inline elements), and system_messages.
657
658 Using `self.patterns.initial`, a pattern which matches start-strings
659 (emphasis, strong, interpreted, phrase reference, literal,
660 substitution reference, and inline target) and complete constructs
661 (simple reference, footnote reference), search for a candidate. When
662 one is found, check for validity (e.g., not a quoted '*' character).
663 If valid, search for the corresponding end string if applicable, and
664 check it for validity. If not found or invalid, generate a warning
665 and ignore the start-string. Implicit inline markup (e.g. standalone
666 URIs) is found last.
667
668 :text: source string
669 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`
670 """
671 self.document = memo.document
672 self.language = memo.language
673 self.reporter = self.document.reporter
674 self.parent = parent
675 pattern_search = self.patterns.initial.search
676 dispatch = self.dispatch
677 remaining = escape2null(text)
678 processed = []
679 unprocessed = []
680 messages = []
681 while remaining:
682 match = pattern_search(remaining)
683 if match:
684 groups = match.groupdict()
685 method = dispatch[groups['start'] or groups['backquote']
686 or groups['refend'] or groups['fnend']]
687 before, inlines, remaining, sysmessages = method(self, match,
688 lineno)
689 unprocessed.append(before)
690 messages += sysmessages
691 if inlines:
692 processed += self.implicit_inline(''.join(unprocessed),
693 lineno)
694 processed += inlines
695 unprocessed = []
696 else:
697 break
698 remaining = ''.join(unprocessed) + remaining
699 if remaining:
700 processed += self.implicit_inline(remaining, lineno)
701 return processed, messages
702
703 # Inline object recognition
704 # -------------------------
705 # See also init_customizations().
706 non_whitespace_before = r'(?<!\s)'
707 non_whitespace_escape_before = r'(?<![\s\x00])'
708 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'
709 non_whitespace_after = r'(?!\s)'
710 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
711 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
712 # Valid URI characters (see RFC 2396 & RFC 2732);
713 # final \x00 allows backslash escapes in URIs:
714 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
715 # Delimiter indicating the end of a URI (not part of the URI):
716 uri_end_delim = r"""[>]"""
717 # Last URI character; same as uric but no punctuation:
718 urilast = r"""[_~*/=+a-zA-Z0-9]"""
719 # End of a URI (either 'urilast' or 'uric followed by a
720 # uri_end_delim'):
721 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
722 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
723 email_pattern = r"""
724 %(emailc)s+(?:\.%(emailc)s+)* # name
725 (?<!\x00)@ # at
726 %(emailc)s+(?:\.%(emailc)s*)* # host
727 %(uri_end)s # final URI char
728 """
729
730 def quoted_start(self, match):
731 """Test if inline markup start-string is 'quoted'.
732
733 'Quoted' in this context means the start-string is enclosed in a pair
734 of matching opening/closing delimiters (not necessarily quotes)
735 or at the end of the match.
736 """
737 string = match.string
738 start = match.start()
739 if start == 0: # start-string at beginning of text
740 return False
741 prestart = string[start - 1]
742 try:
743 poststart = string[match.end()]
744 except IndexError: # start-string at end of text
745 return True # not "quoted" but no markup start-string either
746 return punctuation_chars.match_chars(prestart, poststart)
747
748 def inline_obj(self, match, lineno, end_pattern, nodeclass,
749 restore_backslashes=False):
750 string = match.string
751 matchstart = match.start('start')
752 matchend = match.end('start')
753 if self.quoted_start(match):
754 return string[:matchend], [], string[matchend:], [], ''
755 endmatch = end_pattern.search(string[matchend:])
756 if endmatch and endmatch.start(1): # 1 or more chars
757 text = endmatch.string[:endmatch.start(1)]
758 if restore_backslashes:
759 text = unescape(text, True)
760 textend = matchend + endmatch.end(1)
761 rawsource = unescape(string[matchstart:textend], True)
762 node = nodeclass(rawsource, text)
763 return (string[:matchstart], [node],
764 string[textend:], [], endmatch.group(1))
765 msg = self.reporter.warning(
766 'Inline %s start-string without end-string.'
767 % nodeclass.__name__, line=lineno)
768 text = unescape(string[matchstart:matchend], True)
769 prb = self.problematic(text, text, msg)
770 return string[:matchstart], [prb], string[matchend:], [msg], ''
771
772 def problematic(self, text, rawsource, message):
773 msgid = self.document.set_id(message, self.parent)
774 problematic = nodes.problematic(rawsource, text, refid=msgid)
775 prbid = self.document.set_id(problematic)
776 message.add_backref(prbid)
777 return problematic
778
779 def emphasis(self, match, lineno):
780 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
781 match, lineno, self.patterns.emphasis, nodes.emphasis)
782 return before, inlines, remaining, sysmessages
783
784 def strong(self, match, lineno):
785 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
786 match, lineno, self.patterns.strong, nodes.strong)
787 return before, inlines, remaining, sysmessages
788
789 def interpreted_or_phrase_ref(self, match, lineno):
790 end_pattern = self.patterns.interpreted_or_phrase_ref
791 string = match.string
792 matchstart = match.start('backquote')
793 matchend = match.end('backquote')
794 rolestart = match.start('role')
795 role = match.group('role')
796 position = ''
797 if role:
798 role = role[1:-1]
799 position = 'prefix'
800 elif self.quoted_start(match):
801 return string[:matchend], [], string[matchend:], []
802 endmatch = end_pattern.search(string[matchend:])
803 if endmatch and endmatch.start(1): # 1 or more chars
804 textend = matchend + endmatch.end()
805 if endmatch.group('role'):
806 if role:
807 msg = self.reporter.warning(
808 'Multiple roles in interpreted text (both '
809 'prefix and suffix present; only one allowed).',
810 line=lineno)
811 text = unescape(string[rolestart:textend], True)
812 prb = self.problematic(text, text, msg)
813 return string[:rolestart], [prb], string[textend:], [msg]
814 role = endmatch.group('suffix')[1:-1]
815 position = 'suffix'
816 escaped = endmatch.string[:endmatch.start(1)]
817 rawsource = unescape(string[matchstart:textend], True)
818 if rawsource[-1:] == '_':
819 if role:
820 msg = self.reporter.warning(
821 'Mismatch: both interpreted text role %s and '
822 'reference suffix.' % position, line=lineno)
823 text = unescape(string[rolestart:textend], True)
824 prb = self.problematic(text, text, msg)
825 return string[:rolestart], [prb], string[textend:], [msg]
826 return self.phrase_ref(string[:matchstart], string[textend:],
827 rawsource, escaped)
828 else:
829 rawsource = unescape(string[rolestart:textend], True)
830 nodelist, messages = self.interpreted(rawsource, escaped, role,
831 lineno)
832 return (string[:rolestart], nodelist,
833 string[textend:], messages)
834 msg = self.reporter.warning(
835 'Inline interpreted text or phrase reference start-string '
836 'without end-string.', line=lineno)
837 text = unescape(string[matchstart:matchend], True)
838 prb = self.problematic(text, text, msg)
839 return string[:matchstart], [prb], string[matchend:], [msg]
840
841 def phrase_ref(self, before, after, rawsource, escaped, text=None):
842 # `text` is ignored (since 0.16)
843 match = self.patterns.embedded_link.search(escaped)
844 if match: # embedded <URI> or <alias_>
845 text = escaped[:match.start(0)]
846 unescaped = unescape(text)
847 rawtext = unescape(text, True)
848 aliastext = match.group(2)
849 rawaliastext = unescape(aliastext, True)
850 underscore_escaped = rawaliastext.endswith(r'\_')
851 if (aliastext.endswith('_')
852 and not (underscore_escaped
853 or self.patterns.uri.match(aliastext))):
854 aliastype = 'name'
855 alias = normalize_name(unescape(aliastext[:-1]))
856 target = nodes.target(match.group(1), refname=alias)
857 target.indirect_reference_name = whitespace_normalize_name(
858 unescape(aliastext[:-1]))
859 else:
860 aliastype = 'uri'
861 # remove unescaped whitespace
862 alias_parts = split_escaped_whitespace(match.group(2))
863 alias = ' '.join(''.join(part.split())
864 for part in alias_parts)
865 alias = self.adjust_uri(unescape(alias))
866 if alias.endswith(r'\_'):
867 alias = alias[:-2] + '_'
868 target = nodes.target(match.group(1), refuri=alias)
869 target.referenced = 1
870 if not aliastext:
871 raise ApplicationError('problem with embedded link: %r'
872 % aliastext)
873 if not text:
874 text = alias
875 unescaped = unescape(text)
876 rawtext = rawaliastext
877 else:
878 text = escaped
879 unescaped = unescape(text)
880 target = None
881 rawtext = unescape(escaped, True)
882
883 refname = normalize_name(unescaped)
884 reference = nodes.reference(rawsource, text,
885 name=whitespace_normalize_name(unescaped))
886 reference[0].rawsource = rawtext
887
888 node_list = [reference]
889
890 if rawsource[-2:] == '__':
891 if target and (aliastype == 'name'):
892 reference['refname'] = alias
893 self.document.note_refname(reference)
894 # self.document.note_indirect_target(target) # required?
895 elif target and (aliastype == 'uri'):
896 reference['refuri'] = alias
897 else:
898 reference['anonymous'] = True
899 else:
900 if target:
901 target['names'].append(refname)
902 if aliastype == 'name':
903 reference['refname'] = alias
904 self.document.note_indirect_target(target)
905 self.document.note_refname(reference)
906 else:
907 reference['refuri'] = alias
908 # target.note_referenced_by(name=refname)
909 self.document.note_implicit_target(target, self.parent)
910 node_list.append(target)
911 else:
912 reference['refname'] = refname
913 self.document.note_refname(reference)
914 return before, node_list, after, []
915
916 def adjust_uri(self, uri):
917 match = self.patterns.email.match(uri)
918 if match:
919 return 'mailto:' + uri
920 else:
921 return uri
922
923 def interpreted(self, rawsource, text, role, lineno):
924 role_fn, messages = roles.role(role, self.language, lineno,
925 self.reporter)
926 if role_fn:
927 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
928 return nodes, messages + messages2
929 else:
930 msg = self.reporter.error(
931 'Unknown interpreted text role "%s".' % role,
932 line=lineno)
933 return ([self.problematic(rawsource, rawsource, msg)],
934 messages + [msg])
935
936 def literal(self, match, lineno):
937 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
938 match, lineno, self.patterns.literal, nodes.literal,
939 restore_backslashes=True)
940 return before, inlines, remaining, sysmessages
941
942 def inline_internal_target(self, match, lineno):
943 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
944 match, lineno, self.patterns.target, nodes.target)
945 if inlines and isinstance(inlines[0], nodes.target):
946 assert len(inlines) == 1
947 target = inlines[0]
948 name = normalize_name(target.astext())
949 target['names'].append(name)
950 self.document.note_explicit_target(target, self.parent)
951 return before, inlines, remaining, sysmessages
952
953 def substitution_reference(self, match, lineno):
954 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
955 match, lineno, self.patterns.substitution_ref,
956 nodes.substitution_reference)
957 if len(inlines) == 1:
958 subref_node = inlines[0]
959 if isinstance(subref_node, nodes.substitution_reference):
960 subref_text = subref_node.astext()
961 self.document.note_substitution_ref(subref_node, subref_text)
962 if endstring[-1:] == '_':
963 reference_node = nodes.reference(
964 '|%s%s' % (subref_text, endstring), '')
965 if endstring[-2:] == '__':
966 reference_node['anonymous'] = True
967 else:
968 reference_node['refname'] = normalize_name(subref_text)
969 self.document.note_refname(reference_node)
970 reference_node += subref_node
971 inlines = [reference_node]
972 return before, inlines, remaining, sysmessages
973
974 def footnote_reference(self, match, lineno):
975 """
976 Handles `nodes.footnote_reference` and `nodes.citation_reference`
977 elements.
978 """
979 label = match.group('footnotelabel')
980 refname = normalize_name(label)
981 string = match.string
982 before = string[:match.start('whole')]
983 remaining = string[match.end('whole'):]
984 if match.group('citationlabel'):
985 refnode = nodes.citation_reference('[%s]_' % label,
986 refname=refname)
987 refnode += nodes.Text(label)
988 self.document.note_citation_ref(refnode)
989 else:
990 refnode = nodes.footnote_reference('[%s]_' % label)
991 if refname[0] == '#':
992 refname = refname[1:]
993 refnode['auto'] = 1
994 self.document.note_autofootnote_ref(refnode)
995 elif refname == '*':
996 refname = ''
997 refnode['auto'] = '*'
998 self.document.note_symbol_footnote_ref(
999 refnode)
1000 else:
1001 refnode += nodes.Text(label)
1002 if refname:
1003 refnode['refname'] = refname
1004 self.document.note_footnote_ref(refnode)
1005 if utils.get_trim_footnote_ref_space(self.document.settings):
1006 before = before.rstrip()
1007 return before, [refnode], remaining, []
1008
1009 def reference(self, match, lineno, anonymous=False):
1010 referencename = match.group('refname')
1011 refname = normalize_name(referencename)
1012 referencenode = nodes.reference(
1013 referencename + match.group('refend'), referencename,
1014 name=whitespace_normalize_name(referencename))
1015 referencenode[0].rawsource = referencename
1016 if anonymous:
1017 referencenode['anonymous'] = True
1018 else:
1019 referencenode['refname'] = refname
1020 self.document.note_refname(referencenode)
1021 string = match.string
1022 matchstart = match.start('whole')
1023 matchend = match.end('whole')
1024 return string[:matchstart], [referencenode], string[matchend:], []
1025
1026 def anonymous_reference(self, match, lineno):
1027 return self.reference(match, lineno, anonymous=True)
1028
1029 def standalone_uri(self, match, lineno):
1030 if (not match.group('scheme')
1031 or match.group('scheme').lower() in urischemes.schemes):
1032 if match.group('email'):
1033 addscheme = 'mailto:'
1034 else:
1035 addscheme = ''
1036 text = match.group('whole')
1037 refuri = addscheme + unescape(text)
1038 reference = nodes.reference(unescape(text, True), text,
1039 refuri=refuri)
1040 return [reference]
1041 else: # not a valid scheme
1042 raise MarkupMismatch
1043
1044 def pep_reference(self, match, lineno):
1045 text = match.group(0)
1046 if text.startswith('pep-'):
1047 pepnum = int(unescape(match.group('pepnum1')))
1048 elif text.startswith('PEP'):
1049 pepnum = int(unescape(match.group('pepnum2')))
1050 else:
1051 raise MarkupMismatch
1052 ref = (self.document.settings.pep_base_url
1053 + self.document.settings.pep_file_url_template % pepnum)
1054 return [nodes.reference(unescape(text, True), text, refuri=ref)]
1055
1056 rfc_url = 'rfc%d.html'
1057
1058 def rfc_reference(self, match, lineno):
1059 text = match.group(0)
1060 if text.startswith('RFC'):
1061 rfcnum = int(unescape(match.group('rfcnum')))
1062 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
1063 else:
1064 raise MarkupMismatch
1065 return [nodes.reference(unescape(text, True), text, refuri=ref)]
1066
1067 def implicit_inline(self, text, lineno):
1068 """
1069 Check each of the patterns in `self.implicit_dispatch` for a match,
1070 and dispatch to the stored method for the pattern. Recursively check
1071 the text before and after the match. Return a list of `nodes.Text`
1072 and inline element nodes.
1073 """
1074 if not text:
1075 return []
1076 for pattern, method in self.implicit_dispatch:
1077 match = pattern.search(text)
1078 if match:
1079 try:
1080 # Must recurse on strings before *and* after the match;
1081 # there may be multiple patterns.
1082 return (self.implicit_inline(text[:match.start()], lineno)
1083 + method(match, lineno)
1084 + self.implicit_inline(text[match.end():], lineno))
1085 except MarkupMismatch:
1086 pass
1087 return [nodes.Text(text)]
1088
1089 dispatch = {'*': emphasis,
1090 '**': strong,
1091 '`': interpreted_or_phrase_ref,
1092 '``': literal,
1093 '_`': inline_internal_target,
1094 ']_': footnote_reference,
1095 '|': substitution_reference,
1096 '_': reference,
1097 '__': anonymous_reference}
1098
1099
1100def _loweralpha_to_int(s, _zero=(ord('a')-1)):
1101 return ord(s) - _zero
1102
1103
1104def _upperalpha_to_int(s, _zero=(ord('A')-1)):
1105 return ord(s) - _zero
1106
1107
1108class Body(RSTState):
1109
1110 """
1111 Generic classifier of the first line of a block.
1112 """
1113
1114 double_width_pad_char = tableparser.TableParser.double_width_pad_char
1115 """Padding character for East Asian double-width text."""
1116
1117 enum = Struct()
1118 """Enumerated list parsing information."""
1119
1120 enum.formatinfo = {
1121 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
1122 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
1123 'period': Struct(prefix='', suffix='.', start=0, end=-1)}
1124 enum.formats = enum.formatinfo.keys()
1125 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
1126 'lowerroman', 'upperroman'] # ORDERED!
1127 enum.sequencepats = {'arabic': '[0-9]+',
1128 'loweralpha': '[a-z]',
1129 'upperalpha': '[A-Z]',
1130 'lowerroman': '[ivxlcdm]+',
1131 'upperroman': '[IVXLCDM]+'}
1132 enum.converters = {'arabic': int,
1133 'loweralpha': _loweralpha_to_int,
1134 'upperalpha': _upperalpha_to_int,
1135 'lowerroman': RomanNumeral.from_string,
1136 'upperroman': RomanNumeral.from_string}
1137
1138 enum.sequenceregexps = {}
1139 for sequence in enum.sequences:
1140 enum.sequenceregexps[sequence] = re.compile(
1141 enum.sequencepats[sequence] + '$')
1142
1143 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
1144 """Matches the top (& bottom) of a full table)."""
1145
1146 simple_table_top_pat = re.compile('=+( +=+)+ *$')
1147 """Matches the top of a simple table."""
1148
1149 simple_table_border_pat = re.compile('=+[ =]*$')
1150 """Matches the bottom & header bottom of a simple table."""
1151
1152 pats = {}
1153 """Fragments of patterns used by transitions."""
1154
1155 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1156 pats['alpha'] = '[a-zA-Z]'
1157 pats['alphanum'] = '[a-zA-Z0-9]'
1158 pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1159 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1160 '|%(upperroman)s|#)' % enum.sequencepats)
1161 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1162 # @@@ Loosen up the pattern? Allow Unicode?
1163 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1164 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1165 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1166 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1167
1168 for format in enum.formats:
1169 pats[format] = '(?P<%s>%s%s%s)' % (
1170 format, re.escape(enum.formatinfo[format].prefix),
1171 pats['enum'], re.escape(enum.formatinfo[format].suffix))
1172
1173 patterns = {
1174 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',
1175 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1176 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',
1177 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
1178 'doctest': r'>>>( +|$)',
1179 'line_block': r'\|( +|$)',
1180 'grid_table_top': grid_table_top_pat,
1181 'simple_table_top': simple_table_top_pat,
1182 'explicit_markup': r'\.\.( +|$)',
1183 'anonymous': r'__( +|$)',
1184 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
1185 'text': r''}
1186 initial_transitions = (
1187 'bullet',
1188 'enumerator',
1189 'field_marker',
1190 'option_marker',
1191 'doctest',
1192 'line_block',
1193 'grid_table_top',
1194 'simple_table_top',
1195 'explicit_markup',
1196 'anonymous',
1197 'line',
1198 'text')
1199
1200 def indent(self, match, context, next_state):
1201 """Block quote."""
1202 (indented, indent, line_offset, blank_finish
1203 ) = self.state_machine.get_indented()
1204 elements = self.block_quote(indented, line_offset)
1205 self.parent += elements
1206 if not blank_finish:
1207 self.parent += self.unindent_warning('Block quote')
1208 return context, next_state, []
1209
1210 def block_quote(self, indented, line_offset):
1211 elements = []
1212 while indented:
1213 blockquote = nodes.block_quote(rawsource='\n'.join(indented))
1214 (blockquote.source, blockquote.line
1215 ) = self.state_machine.get_source_and_line(line_offset+1)
1216 (blockquote_lines,
1217 attribution_lines,
1218 attribution_offset,
1219 indented,
1220 new_line_offset) = self.split_attribution(indented, line_offset)
1221 self.nested_parse(blockquote_lines, line_offset, blockquote)
1222 elements.append(blockquote)
1223 if attribution_lines:
1224 attribution, messages = self.parse_attribution(
1225 attribution_lines, line_offset+attribution_offset)
1226 blockquote += attribution
1227 elements += messages
1228 line_offset = new_line_offset
1229 while indented and not indented[0]:
1230 indented = indented[1:]
1231 line_offset += 1
1232 return elements
1233
1234 # U+2014 is an em-dash:
1235 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')
1236
1237 def split_attribution(self, indented, line_offset):
1238 """
1239 Check for a block quote attribution and split it off:
1240
1241 * First line after a blank line must begin with a dash ("--", "---",
1242 em-dash; matches `self.attribution_pattern`).
1243 * Every line after that must have consistent indentation.
1244 * Attributions must be preceded by block quote content.
1245
1246 Return a tuple of: (block quote content lines, attribution lines,
1247 attribution offset, remaining indented lines, remaining lines offset).
1248 """
1249 blank = None
1250 nonblank_seen = False
1251 for i in range(len(indented)):
1252 line = indented[i].rstrip()
1253 if line:
1254 if nonblank_seen and blank == i - 1: # last line blank
1255 match = self.attribution_pattern.match(line)
1256 if match:
1257 attribution_end, indent = self.check_attribution(
1258 indented, i)
1259 if attribution_end:
1260 a_lines = indented[i:attribution_end]
1261 a_lines.trim_left(match.end(), end=1)
1262 a_lines.trim_left(indent, start=1)
1263 return (indented[:i], a_lines,
1264 i, indented[attribution_end:],
1265 line_offset + attribution_end)
1266 nonblank_seen = True
1267 else:
1268 blank = i
1269 else:
1270 return indented, None, None, None, None
1271
1272 def check_attribution(self, indented, attribution_start):
1273 """
1274 Check attribution shape.
1275 Return the index past the end of the attribution, and the indent.
1276 """
1277 indent = None
1278 i = attribution_start + 1
1279 for i in range(attribution_start + 1, len(indented)):
1280 line = indented[i].rstrip()
1281 if not line:
1282 break
1283 if indent is None:
1284 indent = len(line) - len(line.lstrip())
1285 elif len(line) - len(line.lstrip()) != indent:
1286 return None, None # bad shape; not an attribution
1287 else:
1288 # return index of line after last attribution line:
1289 i += 1
1290 return i, (indent or 0)
1291
1292 def parse_attribution(self, indented, line_offset):
1293 text = '\n'.join(indented).rstrip()
1294 lineno = 1 + line_offset # line_offset is zero-based
1295 textnodes, messages = self.inline_text(text, lineno)
1296 node = nodes.attribution(text, '', *textnodes)
1297 node.source, node.line = self.state_machine.get_source_and_line(lineno)
1298 return node, messages
1299
1300 def bullet(self, match, context, next_state):
1301 """Bullet list item."""
1302 ul = nodes.bullet_list()
1303 ul.source, ul.line = self.state_machine.get_source_and_line()
1304 self.parent += ul
1305 ul['bullet'] = match.string[0]
1306 i, blank_finish = self.list_item(match.end())
1307 ul += i
1308 offset = self.state_machine.line_offset + 1 # next line
1309 new_line_offset, blank_finish = self.nested_list_parse(
1310 self.state_machine.input_lines[offset:],
1311 input_offset=self.state_machine.abs_line_offset() + 1,
1312 node=ul, initial_state='BulletList',
1313 blank_finish=blank_finish)
1314 self.goto_line(new_line_offset)
1315 if not blank_finish:
1316 self.parent += self.unindent_warning('Bullet list')
1317 return [], next_state, []
1318
1319 def list_item(self, indent):
1320 src, srcline = self.state_machine.get_source_and_line()
1321 if self.state_machine.line[indent:]:
1322 indented, line_offset, blank_finish = (
1323 self.state_machine.get_known_indented(indent))
1324 else:
1325 indented, indent, line_offset, blank_finish = (
1326 self.state_machine.get_first_known_indented(indent))
1327 listitem = nodes.list_item('\n'.join(indented))
1328 listitem.source, listitem.line = src, srcline
1329 if indented:
1330 self.nested_parse(indented, input_offset=line_offset,
1331 node=listitem)
1332 return listitem, blank_finish
1333
1334 def enumerator(self, match, context, next_state):
1335 """Enumerated List Item"""
1336 format, sequence, text, ordinal = self.parse_enumerator(match)
1337 if not self.is_enumerated_list_item(ordinal, sequence, format):
1338 raise statemachine.TransitionCorrection('text')
1339 enumlist = nodes.enumerated_list()
1340 (enumlist.source,
1341 enumlist.line) = self.state_machine.get_source_and_line()
1342 self.parent += enumlist
1343 if sequence == '#':
1344 enumlist['enumtype'] = 'arabic'
1345 else:
1346 enumlist['enumtype'] = sequence
1347 enumlist['prefix'] = self.enum.formatinfo[format].prefix
1348 enumlist['suffix'] = self.enum.formatinfo[format].suffix
1349 if ordinal != 1:
1350 enumlist['start'] = ordinal
1351 msg = self.reporter.info(
1352 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1353 % (text, ordinal), base_node=enumlist)
1354 self.parent += msg
1355 listitem, blank_finish = self.list_item(match.end())
1356 enumlist += listitem
1357 offset = self.state_machine.line_offset + 1 # next line
1358 newline_offset, blank_finish = self.nested_list_parse(
1359 self.state_machine.input_lines[offset:],
1360 input_offset=self.state_machine.abs_line_offset() + 1,
1361 node=enumlist, initial_state='EnumeratedList',
1362 blank_finish=blank_finish,
1363 extra_settings={'lastordinal': ordinal,
1364 'format': format,
1365 'auto': sequence == '#'})
1366 self.goto_line(newline_offset)
1367 if not blank_finish:
1368 self.parent += self.unindent_warning('Enumerated list')
1369 return [], next_state, []
1370
1371 def parse_enumerator(self, match, expected_sequence=None):
1372 """
1373 Analyze an enumerator and return the results.
1374
1375 :Return:
1376 - the enumerator format ('period', 'parens', or 'rparen'),
1377 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1378 - the text of the enumerator, stripped of formatting, and
1379 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1380 ``None`` is returned for invalid enumerator text).
1381
1382 The enumerator format has already been determined by the regular
1383 expression match. If `expected_sequence` is given, that sequence is
1384 tried first. If not, we check for Roman numeral 1. This way,
1385 single-character Roman numerals (which are also alphabetical) can be
1386 matched. If no sequence has been matched, all sequences are checked in
1387 order.
1388 """
1389 groupdict = match.groupdict()
1390 sequence = ''
1391 for format in self.enum.formats:
1392 if groupdict[format]: # was this the format matched?
1393 break # yes; keep `format`
1394 else: # shouldn't happen
1395 raise ParserError('enumerator format not matched')
1396 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501
1397 : self.enum.formatinfo[format].end]
1398 if text == '#':
1399 sequence = '#'
1400 elif expected_sequence:
1401 try:
1402 if self.enum.sequenceregexps[expected_sequence].match(text):
1403 sequence = expected_sequence
1404 except KeyError: # shouldn't happen
1405 raise ParserError('unknown enumerator sequence: %s'
1406 % sequence)
1407 elif text == 'i':
1408 sequence = 'lowerroman'
1409 elif text == 'I':
1410 sequence = 'upperroman'
1411 if not sequence:
1412 for sequence in self.enum.sequences:
1413 if self.enum.sequenceregexps[sequence].match(text):
1414 break
1415 else: # shouldn't happen
1416 raise ParserError('enumerator sequence not matched')
1417 if sequence == '#':
1418 ordinal = 1
1419 else:
1420 try:
1421 ordinal = int(self.enum.converters[sequence](text))
1422 except InvalidRomanNumeralError:
1423 ordinal = None
1424 return format, sequence, text, ordinal
1425
1426 def is_enumerated_list_item(self, ordinal, sequence, format):
1427 """
1428 Check validity based on the ordinal value and the second line.
1429
1430 Return true if the ordinal is valid and the second line is blank,
1431 indented, or starts with the next enumerator or an auto-enumerator.
1432 """
1433 if ordinal is None:
1434 return None
1435 try:
1436 next_line = self.state_machine.next_line()
1437 except EOFError: # end of input lines
1438 self.state_machine.previous_line()
1439 return 1
1440 else:
1441 self.state_machine.previous_line()
1442 if not next_line[:1].strip(): # blank or indented
1443 return 1
1444 result = self.make_enumerator(ordinal + 1, sequence, format)
1445 if result:
1446 next_enumerator, auto_enumerator = result
1447 try:
1448 if next_line.startswith((next_enumerator, auto_enumerator)):
1449 return 1
1450 except TypeError:
1451 pass
1452 return None
1453
1454 def make_enumerator(self, ordinal, sequence, format):
1455 """
1456 Construct and return the next enumerated list item marker, and an
1457 auto-enumerator ("#" instead of the regular enumerator).
1458
1459 Return ``None`` for invalid (out of range) ordinals.
1460 """
1461 if sequence == '#':
1462 enumerator = '#'
1463 elif sequence == 'arabic':
1464 enumerator = str(ordinal)
1465 else:
1466 if sequence.endswith('alpha'):
1467 if ordinal > 26:
1468 return None
1469 enumerator = chr(ordinal + ord('a') - 1)
1470 elif sequence.endswith('roman'):
1471 try:
1472 enumerator = RomanNumeral(ordinal).to_uppercase()
1473 except TypeError:
1474 return None
1475 else: # shouldn't happen
1476 raise ParserError('unknown enumerator sequence: "%s"'
1477 % sequence)
1478 if sequence.startswith('lower'):
1479 enumerator = enumerator.lower()
1480 elif sequence.startswith('upper'):
1481 enumerator = enumerator.upper()
1482 else: # shouldn't happen
1483 raise ParserError('unknown enumerator sequence: "%s"'
1484 % sequence)
1485 formatinfo = self.enum.formatinfo[format]
1486 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
1487 + ' ')
1488 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
1489 return next_enumerator, auto_enumerator
1490
1491 def field_marker(self, match, context, next_state):
1492 """Field list item."""
1493 field_list = nodes.field_list()
1494 self.parent += field_list
1495 field, blank_finish = self.field(match)
1496 field_list += field
1497 offset = self.state_machine.line_offset + 1 # next line
1498 newline_offset, blank_finish = self.nested_list_parse(
1499 self.state_machine.input_lines[offset:],
1500 input_offset=self.state_machine.abs_line_offset() + 1,
1501 node=field_list, initial_state='FieldList',
1502 blank_finish=blank_finish)
1503 self.goto_line(newline_offset)
1504 if not blank_finish:
1505 self.parent += self.unindent_warning('Field list')
1506 return [], next_state, []
1507
1508 def field(self, match):
1509 name = self.parse_field_marker(match)
1510 src, srcline = self.state_machine.get_source_and_line()
1511 lineno = self.state_machine.abs_line_number()
1512 (indented, indent, line_offset, blank_finish
1513 ) = self.state_machine.get_first_known_indented(match.end())
1514 field_node = nodes.field()
1515 field_node.source = src
1516 field_node.line = srcline
1517 name_nodes, name_messages = self.inline_text(name, lineno)
1518 field_node += nodes.field_name(name, '', *name_nodes)
1519 field_body = nodes.field_body('\n'.join(indented), *name_messages)
1520 field_node += field_body
1521 if indented:
1522 self.parse_field_body(indented, line_offset, field_body)
1523 return field_node, blank_finish
1524
1525 def parse_field_marker(self, match):
1526 """Extract & return field name from a field marker match."""
1527 field = match.group()[1:] # strip off leading ':'
1528 field = field[:field.rfind(':')] # strip off trailing ':' etc.
1529 return field
1530
1531 def parse_field_body(self, indented, offset, node) -> None:
1532 self.nested_parse(indented, input_offset=offset, node=node)
1533
1534 def option_marker(self, match, context, next_state):
1535 """Option list item."""
1536 optionlist = nodes.option_list()
1537 (optionlist.source, optionlist.line
1538 ) = self.state_machine.get_source_and_line()
1539 try:
1540 listitem, blank_finish = self.option_list_item(match)
1541 except MarkupError as error:
1542 # This shouldn't happen; pattern won't match.
1543 msg = self.reporter.error('Invalid option list marker: %s'
1544 % error)
1545 self.parent += msg
1546 (indented, indent, line_offset, blank_finish
1547 ) = self.state_machine.get_first_known_indented(match.end())
1548 elements = self.block_quote(indented, line_offset)
1549 self.parent += elements
1550 if not blank_finish:
1551 self.parent += self.unindent_warning('Option list')
1552 return [], next_state, []
1553 self.parent += optionlist
1554 optionlist += listitem
1555 offset = self.state_machine.line_offset + 1 # next line
1556 newline_offset, blank_finish = self.nested_list_parse(
1557 self.state_machine.input_lines[offset:],
1558 input_offset=self.state_machine.abs_line_offset() + 1,
1559 node=optionlist, initial_state='OptionList',
1560 blank_finish=blank_finish)
1561 self.goto_line(newline_offset)
1562 if not blank_finish:
1563 self.parent += self.unindent_warning('Option list')
1564 return [], next_state, []
1565
1566 def option_list_item(self, match):
1567 offset = self.state_machine.abs_line_offset()
1568 options = self.parse_option_marker(match)
1569 (indented, indent, line_offset, blank_finish
1570 ) = self.state_machine.get_first_known_indented(match.end())
1571 if not indented: # not an option list item
1572 self.goto_line(offset)
1573 raise statemachine.TransitionCorrection('text')
1574 option_group = nodes.option_group('', *options)
1575 description = nodes.description('\n'.join(indented))
1576 option_list_item = nodes.option_list_item('', option_group,
1577 description)
1578 if indented:
1579 self.nested_parse(indented, input_offset=line_offset,
1580 node=description)
1581 return option_list_item, blank_finish
1582
1583 def parse_option_marker(self, match):
1584 """
1585 Return a list of `node.option` and `node.option_argument` objects,
1586 parsed from an option marker match.
1587
1588 :Exception: `MarkupError` for invalid option markers.
1589 """
1590 optlist = []
1591 # split at ", ", except inside < > (complex arguments)
1592 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())
1593 for optionstring in optionstrings:
1594 tokens = optionstring.split()
1595 delimiter = ' '
1596 firstopt = tokens[0].split('=', 1)
1597 if len(firstopt) > 1:
1598 # "--opt=value" form
1599 tokens[:1] = firstopt
1600 delimiter = '='
1601 elif (len(tokens[0]) > 2
1602 and ((tokens[0].startswith('-')
1603 and not tokens[0].startswith('--'))
1604 or tokens[0].startswith('+'))):
1605 # "-ovalue" form
1606 tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1607 delimiter = ''
1608 if len(tokens) > 1 and (tokens[1].startswith('<')
1609 and tokens[-1].endswith('>')):
1610 # "-o <value1 value2>" form; join all values into one token
1611 tokens[1:] = [' '.join(tokens[1:])]
1612 if 0 < len(tokens) <= 2:
1613 option = nodes.option(optionstring)
1614 option += nodes.option_string(tokens[0], tokens[0])
1615 if len(tokens) > 1:
1616 option += nodes.option_argument(tokens[1], tokens[1],
1617 delimiter=delimiter)
1618 optlist.append(option)
1619 else:
1620 raise MarkupError(
1621 'wrong number of option tokens (=%s), should be 1 or 2: '
1622 '"%s"' % (len(tokens), optionstring))
1623 return optlist
1624
1625 def doctest(self, match, context, next_state):
1626 line = self.document.current_line
1627 data = '\n'.join(self.state_machine.get_text_block())
1628 # TODO: Parse with `directives.body.CodeBlock` with
1629 # argument 'pycon' (Python Console) in Docutils 1.0.
1630 n = nodes.doctest_block(data, data)
1631 n.line = line
1632 self.parent += n
1633 return [], next_state, []
1634
1635 def line_block(self, match, context, next_state):
1636 """First line of a line block."""
1637 block = nodes.line_block()
1638 self.parent += block
1639 lineno = self.state_machine.abs_line_number()
1640 (block.source,
1641 block.line) = self.state_machine.get_source_and_line(lineno)
1642 line, messages, blank_finish = self.line_block_line(match, lineno)
1643 block += line
1644 self.parent += messages
1645 if not blank_finish:
1646 offset = self.state_machine.line_offset + 1 # next line
1647 new_line_offset, blank_finish = self.nested_list_parse(
1648 self.state_machine.input_lines[offset:],
1649 input_offset=self.state_machine.abs_line_offset() + 1,
1650 node=block, initial_state='LineBlock',
1651 blank_finish=False)
1652 self.goto_line(new_line_offset)
1653 if not blank_finish:
1654 self.parent += self.reporter.warning(
1655 'Line block ends without a blank line.',
1656 line=lineno+1)
1657 if len(block):
1658 if block[0].indent is None:
1659 block[0].indent = 0
1660 self.nest_line_block_lines(block)
1661 return [], next_state, []
1662
1663 def line_block_line(self, match, lineno):
1664 """Return one line element of a line_block."""
1665 (indented, indent, line_offset, blank_finish
1666 ) = self.state_machine.get_first_known_indented(match.end(),
1667 until_blank=True)
1668 text = '\n'.join(indented)
1669 text_nodes, messages = self.inline_text(text, lineno)
1670 line = nodes.line(text, '', *text_nodes)
1671 (line.source,
1672 line.line) = self.state_machine.get_source_and_line(lineno)
1673 if match.string.rstrip() != '|': # not empty
1674 line.indent = len(match.group(1)) - 1
1675 return line, messages, blank_finish
1676
1677 def nest_line_block_lines(self, block) -> None:
1678 for index in range(1, len(block)):
1679 if block[index].indent is None:
1680 block[index].indent = block[index - 1].indent
1681 self.nest_line_block_segment(block)
1682
1683 def nest_line_block_segment(self, block) -> None:
1684 indents = [item.indent for item in block]
1685 least = min(indents)
1686 new_items = []
1687 new_block = nodes.line_block()
1688 for item in block:
1689 if item.indent > least:
1690 new_block.append(item)
1691 else:
1692 if len(new_block):
1693 self.nest_line_block_segment(new_block)
1694 new_items.append(new_block)
1695 new_block = nodes.line_block()
1696 new_items.append(item)
1697 if len(new_block):
1698 self.nest_line_block_segment(new_block)
1699 new_items.append(new_block)
1700 block[:] = new_items
1701
1702 def grid_table_top(self, match, context, next_state):
1703 """Top border of a full table."""
1704 return self.table_top(match, context, next_state,
1705 self.isolate_grid_table,
1706 tableparser.GridTableParser)
1707
1708 def simple_table_top(self, match, context, next_state):
1709 """Top border of a simple table."""
1710 return self.table_top(match, context, next_state,
1711 self.isolate_simple_table,
1712 tableparser.SimpleTableParser)
1713
1714 def table_top(self, match, context, next_state,
1715 isolate_function, parser_class):
1716 """Top border of a generic table."""
1717 nodelist, blank_finish = self.table(isolate_function, parser_class)
1718 self.parent += nodelist
1719 if not blank_finish:
1720 msg = self.reporter.warning(
1721 'Blank line required after table.',
1722 line=self.state_machine.abs_line_number()+1)
1723 self.parent += msg
1724 return [], next_state, []
1725
1726 def table(self, isolate_function, parser_class):
1727 """Parse a table."""
1728 block, messages, blank_finish = isolate_function()
1729 if block:
1730 try:
1731 parser = parser_class()
1732 tabledata = parser.parse(block)
1733 tableline = (self.state_machine.abs_line_number() - len(block)
1734 + 1)
1735 table = self.build_table(tabledata, tableline)
1736 nodelist = [table] + messages
1737 except tableparser.TableMarkupError as err:
1738 nodelist = self.malformed_table(block, ' '.join(err.args),
1739 offset=err.offset) + messages
1740 else:
1741 nodelist = messages
1742 return nodelist, blank_finish
1743
1744 def isolate_grid_table(self):
1745 messages = []
1746 blank_finish = True
1747 try:
1748 block = self.state_machine.get_text_block(flush_left=True)
1749 except statemachine.UnexpectedIndentationError as err:
1750 block, src, srcline = err.args
1751 messages.append(self.reporter.error('Unexpected indentation.',
1752 source=src, line=srcline))
1753 blank_finish = False
1754 block.disconnect()
1755 # for East Asian chars:
1756 block.pad_double_width(self.double_width_pad_char)
1757 width = len(block[0].strip())
1758 for i in range(len(block)):
1759 block[i] = block[i].strip()
1760 if block[i][0] not in '+|': # check left edge
1761 blank_finish = False
1762 self.state_machine.previous_line(len(block) - i)
1763 del block[i:]
1764 break
1765 if not self.grid_table_top_pat.match(block[-1]): # find bottom
1766 # from second-last to third line of table:
1767 for i in range(len(block) - 2, 1, -1):
1768 if self.grid_table_top_pat.match(block[i]):
1769 self.state_machine.previous_line(len(block) - i + 1)
1770 del block[i+1:]
1771 blank_finish = False
1772 break
1773 else:
1774 detail = 'Bottom border missing or corrupt.'
1775 messages.extend(self.malformed_table(block, detail, i))
1776 return [], messages, blank_finish
1777 for i in range(len(block)): # check right edge
1778 if len(block[i]) != width or block[i][-1] not in '+|':
1779 detail = 'Right border not aligned or missing.'
1780 messages.extend(self.malformed_table(block, detail, i))
1781 return [], messages, blank_finish
1782 return block, messages, blank_finish
1783
1784 def isolate_simple_table(self):
1785 start = self.state_machine.line_offset
1786 lines = self.state_machine.input_lines
1787 limit = len(lines) - 1
1788 toplen = len(lines[start].strip())
1789 pattern_match = self.simple_table_border_pat.match
1790 found = 0
1791 found_at = None
1792 i = start + 1
1793 while i <= limit:
1794 line = lines[i]
1795 match = pattern_match(line)
1796 if match:
1797 if len(line.strip()) != toplen:
1798 self.state_machine.next_line(i - start)
1799 messages = self.malformed_table(
1800 lines[start:i+1], 'Bottom border or header rule does '
1801 'not match top border.', i-start)
1802 return [], messages, i == limit or not lines[i+1].strip()
1803 found += 1
1804 found_at = i
1805 if found == 2 or i == limit or not lines[i+1].strip():
1806 end = i
1807 break
1808 i += 1
1809 else: # reached end of input_lines
1810 details = 'No bottom table border found'
1811 if found:
1812 details += ' or no blank line after table bottom'
1813 self.state_machine.next_line(found_at - start)
1814 block = lines[start:found_at+1]
1815 else:
1816 self.state_machine.next_line(i - start - 1)
1817 block = lines[start:]
1818 messages = self.malformed_table(block, details + '.')
1819 return [], messages, not found
1820 self.state_machine.next_line(end - start)
1821 block = lines[start:end+1]
1822 # for East Asian chars:
1823 block.pad_double_width(self.double_width_pad_char)
1824 return block, [], end == limit or not lines[end+1].strip()
1825
1826 def malformed_table(self, block, detail='', offset=0):
1827 block.replace(self.double_width_pad_char, '')
1828 data = '\n'.join(block)
1829 message = 'Malformed table.'
1830 startline = self.state_machine.abs_line_number() - len(block) + 1
1831 if detail:
1832 message += '\n' + detail
1833 error = self.reporter.error(message, nodes.literal_block(data, data),
1834 line=startline+offset)
1835 return [error]
1836
1837 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):
1838 colwidths, headrows, bodyrows = tabledata
1839 table = nodes.table()
1840 if widths == 'auto':
1841 table['classes'] += ['colwidths-auto']
1842 elif widths: # "grid" or list of integers
1843 table['classes'] += ['colwidths-given']
1844 tgroup = nodes.tgroup(cols=len(colwidths))
1845 table += tgroup
1846 for colwidth in colwidths:
1847 colspec = nodes.colspec(colwidth=colwidth)
1848 if stub_columns:
1849 colspec.attributes['stub'] = True
1850 stub_columns -= 1
1851 tgroup += colspec
1852 if headrows:
1853 thead = nodes.thead()
1854 tgroup += thead
1855 for row in headrows:
1856 thead += self.build_table_row(row, tableline)
1857 tbody = nodes.tbody()
1858 tgroup += tbody
1859 for row in bodyrows:
1860 tbody += self.build_table_row(row, tableline)
1861 return table
1862
1863 def build_table_row(self, rowdata, tableline):
1864 row = nodes.row()
1865 for cell in rowdata:
1866 if cell is None:
1867 continue
1868 morerows, morecols, offset, cellblock = cell
1869 attributes = {}
1870 if morerows:
1871 attributes['morerows'] = morerows
1872 if morecols:
1873 attributes['morecols'] = morecols
1874 entry = nodes.entry(**attributes)
1875 row += entry
1876 if ''.join(cellblock):
1877 self.nested_parse(cellblock, input_offset=tableline+offset,
1878 node=entry)
1879 return row
1880
1881 explicit = Struct()
1882 """Patterns and constants used for explicit markup recognition."""
1883
1884 explicit.patterns = Struct(
1885 target=re.compile(r"""
1886 (
1887 _ # anonymous target
1888 | # *OR*
1889 (?!_) # no underscore at the beginning
1890 (?P<quote>`?) # optional open quote
1891 (?![ `]) # first char. not space or
1892 # backquote
1893 (?P<name> # reference name
1894 .+?
1895 )
1896 %(non_whitespace_escape_before)s
1897 (?P=quote) # close quote if open quote used
1898 )
1899 (?<!(?<!\x00):) # no unescaped colon at end
1900 %(non_whitespace_escape_before)s
1901 [ ]? # optional space
1902 : # end of reference name
1903 ([ ]+|$) # followed by whitespace
1904 """ % vars(Inliner), re.VERBOSE),
1905 reference=re.compile(r"""
1906 (
1907 (?P<simple>%(simplename)s)_
1908 | # *OR*
1909 ` # open backquote
1910 (?![ ]) # not space
1911 (?P<phrase>.+?) # hyperlink phrase
1912 %(non_whitespace_escape_before)s
1913 `_ # close backquote,
1914 # reference mark
1915 )
1916 $ # end of string
1917 """ % vars(Inliner), re.VERBOSE),
1918 substitution=re.compile(r"""
1919 (
1920 (?![ ]) # first char. not space
1921 (?P<name>.+?) # substitution text
1922 %(non_whitespace_escape_before)s
1923 \| # close delimiter
1924 )
1925 ([ ]+|$) # followed by whitespace
1926 """ % vars(Inliner),
1927 re.VERBOSE),)
1928
1929 def footnote(self, match):
1930 src, srcline = self.state_machine.get_source_and_line()
1931 (indented, indent, offset, blank_finish
1932 ) = self.state_machine.get_first_known_indented(match.end())
1933 label = match.group(1)
1934 name = normalize_name(label)
1935 footnote = nodes.footnote('\n'.join(indented))
1936 footnote.source = src
1937 footnote.line = srcline
1938 if name[0] == '#': # auto-numbered
1939 name = name[1:] # autonumber label
1940 footnote['auto'] = 1
1941 if name:
1942 footnote['names'].append(name)
1943 self.document.note_autofootnote(footnote)
1944 elif name == '*': # auto-symbol
1945 name = ''
1946 footnote['auto'] = '*'
1947 self.document.note_symbol_footnote(footnote)
1948 else: # manually numbered
1949 footnote += nodes.label('', label)
1950 footnote['names'].append(name)
1951 self.document.note_footnote(footnote)
1952 if name:
1953 self.document.note_explicit_target(footnote, footnote)
1954 else:
1955 self.document.set_id(footnote, footnote)
1956 if indented:
1957 self.nested_parse(indented, input_offset=offset, node=footnote)
1958 else:
1959 footnote += self.reporter.warning('Footnote content expected.')
1960 return [footnote], blank_finish
1961
1962 def citation(self, match):
1963 src, srcline = self.state_machine.get_source_and_line()
1964 (indented, indent, offset, blank_finish
1965 ) = self.state_machine.get_first_known_indented(match.end())
1966 label = match.group(1)
1967 name = normalize_name(label)
1968 citation = nodes.citation('\n'.join(indented))
1969 citation.source = src
1970 citation.line = srcline
1971 citation += nodes.label('', label)
1972 citation['names'].append(name)
1973 self.document.note_citation(citation)
1974 self.document.note_explicit_target(citation, citation)
1975 if indented:
1976 self.nested_parse(indented, input_offset=offset, node=citation)
1977 else:
1978 citation += self.reporter.warning('Citation content expected.')
1979 return [citation], blank_finish
1980
1981 def hyperlink_target(self, match):
1982 pattern = self.explicit.patterns.target
1983 lineno = self.state_machine.abs_line_number()
1984 (block, indent, offset, blank_finish
1985 ) = self.state_machine.get_first_known_indented(
1986 match.end(), until_blank=True, strip_indent=False)
1987 blocktext = match.string[:match.end()] + '\n'.join(block)
1988 block = [escape2null(line) for line in block]
1989 escaped = block[0]
1990 blockindex = 0
1991 while True:
1992 targetmatch = pattern.match(escaped)
1993 if targetmatch:
1994 break
1995 blockindex += 1
1996 try:
1997 escaped += block[blockindex]
1998 except IndexError:
1999 raise MarkupError('malformed hyperlink target.')
2000 del block[:blockindex]
2001 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
2002 target = self.make_target(block, blocktext, lineno,
2003 targetmatch.group('name'))
2004 return [target], blank_finish
2005
2006 def make_target(self, block, block_text, lineno, target_name):
2007 target_type, data = self.parse_target(block, block_text, lineno)
2008 if target_type == 'refname':
2009 target = nodes.target(block_text, '', refname=normalize_name(data))
2010 target.indirect_reference_name = data
2011 self.add_target(target_name, '', target, lineno)
2012 self.document.note_indirect_target(target)
2013 return target
2014 elif target_type == 'refuri':
2015 target = nodes.target(block_text, '')
2016 self.add_target(target_name, data, target, lineno)
2017 return target
2018 else:
2019 return data
2020
2021 def parse_target(self, block, block_text, lineno):
2022 """
2023 Determine the type of reference of a target.
2024
2025 :Return: A 2-tuple, one of:
2026
2027 - 'refname' and the indirect reference name
2028 - 'refuri' and the URI
2029 - 'malformed' and a system_message node
2030 """
2031 if block and block[-1].strip()[-1:] == '_': # possible indirect target
2032 reference = ' '.join(line.strip() for line in block)
2033 refname = self.is_reference(reference)
2034 if refname:
2035 return 'refname', refname
2036 ref_parts = split_escaped_whitespace(' '.join(block))
2037 reference = ' '.join(''.join(unescape(part).split())
2038 for part in ref_parts)
2039 return 'refuri', reference
2040
2041 def is_reference(self, reference):
2042 match = self.explicit.patterns.reference.match(
2043 whitespace_normalize_name(reference))
2044 if not match:
2045 return None
2046 return unescape(match.group('simple') or match.group('phrase'))
2047
2048 def add_target(self, targetname, refuri, target, lineno):
2049 target.line = lineno
2050 if targetname:
2051 name = normalize_name(unescape(targetname))
2052 target['names'].append(name)
2053 if refuri:
2054 uri = self.inliner.adjust_uri(refuri)
2055 if uri:
2056 target['refuri'] = uri
2057 else:
2058 raise ApplicationError('problem with URI: %r' % refuri)
2059 self.document.note_explicit_target(target, self.parent)
2060 else: # anonymous target
2061 if refuri:
2062 target['refuri'] = refuri
2063 target['anonymous'] = True
2064 self.document.note_anonymous_target(target)
2065
2066 def substitution_def(self, match):
2067 pattern = self.explicit.patterns.substitution
2068 src, srcline = self.state_machine.get_source_and_line()
2069 (block, indent, offset, blank_finish
2070 ) = self.state_machine.get_first_known_indented(match.end(),
2071 strip_indent=False)
2072 blocktext = (match.string[:match.end()] + '\n'.join(block))
2073 block.disconnect()
2074 escaped = escape2null(block[0].rstrip())
2075 blockindex = 0
2076 while True:
2077 subdefmatch = pattern.match(escaped)
2078 if subdefmatch:
2079 break
2080 blockindex += 1
2081 try:
2082 escaped = escaped + ' ' + escape2null(
2083 block[blockindex].strip())
2084 except IndexError:
2085 raise MarkupError('malformed substitution definition.')
2086 del block[:blockindex] # strip out the substitution marker
2087 start = subdefmatch.end()-len(escaped)-1
2088 block[0] = (block[0].strip() + ' ')[start:-1]
2089 if not block[0]:
2090 del block[0]
2091 offset += 1
2092 while block and not block[-1].strip():
2093 block.pop()
2094 subname = subdefmatch.group('name')
2095 substitution_node = nodes.substitution_definition(blocktext)
2096 substitution_node.source = src
2097 substitution_node.line = srcline
2098 if not block:
2099 msg = self.reporter.warning(
2100 'Substitution definition "%s" missing contents.' % subname,
2101 nodes.literal_block(blocktext, blocktext),
2102 source=src, line=srcline)
2103 return [msg], blank_finish
2104 block[0] = block[0].strip()
2105 substitution_node['names'].append(
2106 nodes.whitespace_normalize_name(subname))
2107 new_abs_offset, blank_finish = self.nested_list_parse(
2108 block, input_offset=offset, node=substitution_node,
2109 initial_state='SubstitutionDef', blank_finish=blank_finish)
2110 i = 0
2111 for node in substitution_node[:]:
2112 if not (isinstance(node, nodes.Inline)
2113 or isinstance(node, nodes.Text)):
2114 self.parent += substitution_node[i]
2115 del substitution_node[i]
2116 else:
2117 i += 1
2118 for node in substitution_node.findall(nodes.Element):
2119 if self.disallowed_inside_substitution_definitions(node):
2120 pformat = nodes.literal_block('', node.pformat().rstrip())
2121 msg = self.reporter.error(
2122 'Substitution definition contains illegal element <%s>:'
2123 % node.tagname,
2124 pformat, nodes.literal_block(blocktext, blocktext),
2125 source=src, line=srcline)
2126 return [msg], blank_finish
2127 if len(substitution_node) == 0:
2128 msg = self.reporter.warning(
2129 'Substitution definition "%s" empty or invalid.' % subname,
2130 nodes.literal_block(blocktext, blocktext),
2131 source=src, line=srcline)
2132 return [msg], blank_finish
2133 self.document.note_substitution_def(
2134 substitution_node, subname, self.parent)
2135 return [substitution_node], blank_finish
2136
2137 def disallowed_inside_substitution_definitions(self, node) -> bool:
2138 if (node['ids']
2139 or isinstance(node, nodes.reference) and node.get('anonymous')
2140 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501
2141 return True
2142 else:
2143 return False
2144
2145 def directive(self, match, **option_presets):
2146 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2147 type_name = match.group(1)
2148 directive_class, messages = directives.directive(
2149 type_name, self.memo.language, self.document)
2150 self.parent += messages
2151 if directive_class:
2152 return self.run_directive(
2153 directive_class, match, type_name, option_presets)
2154 else:
2155 return self.unknown_directive(type_name)
2156
2157 def run_directive(self, directive, match, type_name, option_presets):
2158 """
2159 Parse a directive then run its directive function.
2160
2161 Parameters:
2162
2163 - `directive`: The class implementing the directive. Must be
2164 a subclass of `rst.Directive`.
2165
2166 - `match`: A regular expression match object which matched the first
2167 line of the directive.
2168
2169 - `type_name`: The directive name, as used in the source text.
2170
2171 - `option_presets`: A dictionary of preset options, defaults for the
2172 directive options. Currently, only an "alt" option is passed by
2173 substitution definitions (value: the substitution name), which may
2174 be used by an embedded image directive.
2175
2176 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2177 """
2178 if isinstance(directive, (FunctionType, MethodType)):
2179 from docutils.parsers.rst import convert_directive_function
2180 directive = convert_directive_function(directive)
2181 lineno = self.state_machine.abs_line_number()
2182 initial_line_offset = self.state_machine.line_offset
2183 (indented, indent, line_offset, blank_finish
2184 ) = self.state_machine.get_first_known_indented(match.end(),
2185 strip_top=0)
2186 block_text = '\n'.join(self.state_machine.input_lines[
2187 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501
2188 try:
2189 arguments, options, content, content_offset = (
2190 self.parse_directive_block(indented, line_offset,
2191 directive, option_presets))
2192 except MarkupError as detail:
2193 error = self.reporter.error(
2194 'Error in "%s" directive:\n%s.' % (type_name,
2195 ' '.join(detail.args)),
2196 nodes.literal_block(block_text, block_text), line=lineno)
2197 return [error], blank_finish
2198 directive_instance = directive(
2199 type_name, arguments, options, content, lineno,
2200 content_offset, block_text, self, self.state_machine)
2201 try:
2202 result = directive_instance.run()
2203 except docutils.parsers.rst.DirectiveError as error:
2204 msg_node = self.reporter.system_message(error.level, error.msg,
2205 line=lineno)
2206 msg_node += nodes.literal_block(block_text, block_text)
2207 result = [msg_node]
2208 assert isinstance(result, list), \
2209 'Directive "%s" must return a list of nodes.' % type_name
2210 for i in range(len(result)):
2211 assert isinstance(result[i], nodes.Node), \
2212 ('Directive "%s" returned non-Node object (index %s): %r'
2213 % (type_name, i, result[i]))
2214 return (result,
2215 blank_finish or self.state_machine.is_next_line_blank())
2216
2217 def parse_directive_block(self, indented, line_offset, directive,
2218 option_presets):
2219 option_spec = directive.option_spec
2220 has_content = directive.has_content
2221 if indented and not indented[0].strip():
2222 indented.trim_start()
2223 line_offset += 1
2224 while indented and not indented[-1].strip():
2225 indented.trim_end()
2226 if indented and (directive.required_arguments
2227 or directive.optional_arguments
2228 or option_spec):
2229 for i, line in enumerate(indented):
2230 if not line.strip():
2231 break
2232 else:
2233 i += 1
2234 arg_block = indented[:i]
2235 content = indented[i+1:]
2236 content_offset = line_offset + i + 1
2237 else:
2238 content = indented
2239 content_offset = line_offset
2240 arg_block = []
2241 if option_spec:
2242 options, arg_block = self.parse_directive_options(
2243 option_presets, option_spec, arg_block)
2244 else:
2245 options = {}
2246 if arg_block and not (directive.required_arguments
2247 or directive.optional_arguments):
2248 content = arg_block + indented[i:]
2249 content_offset = line_offset
2250 arg_block = []
2251 while content and not content[0].strip():
2252 content.trim_start()
2253 content_offset += 1
2254 if directive.required_arguments or directive.optional_arguments:
2255 arguments = self.parse_directive_arguments(
2256 directive, arg_block)
2257 else:
2258 arguments = []
2259 if content and not has_content:
2260 raise MarkupError('no content permitted')
2261 return arguments, options, content, content_offset
2262
2263 def parse_directive_options(self, option_presets, option_spec, arg_block):
2264 options = option_presets.copy()
2265 for i, line in enumerate(arg_block):
2266 if re.match(Body.patterns['field_marker'], line):
2267 opt_block = arg_block[i:]
2268 arg_block = arg_block[:i]
2269 break
2270 else:
2271 opt_block = []
2272 if opt_block:
2273 success, data = self.parse_extension_options(option_spec,
2274 opt_block)
2275 if success: # data is a dict of options
2276 options.update(data)
2277 else: # data is an error string
2278 raise MarkupError(data)
2279 return options, arg_block
2280
2281 def parse_directive_arguments(self, directive, arg_block):
2282 required = directive.required_arguments
2283 optional = directive.optional_arguments
2284 arg_text = '\n'.join(arg_block)
2285 arguments = arg_text.split()
2286 if len(arguments) < required:
2287 raise MarkupError('%s argument(s) required, %s supplied'
2288 % (required, len(arguments)))
2289 elif len(arguments) > required + optional:
2290 if directive.final_argument_whitespace:
2291 arguments = arg_text.split(None, required + optional - 1)
2292 else:
2293 raise MarkupError(
2294 'maximum %s argument(s) allowed, %s supplied'
2295 % (required + optional, len(arguments)))
2296 return arguments
2297
2298 def parse_extension_options(self, option_spec, datalines):
2299 """
2300 Parse `datalines` for a field list containing extension options
2301 matching `option_spec`.
2302
2303 :Parameters:
2304 - `option_spec`: a mapping of option name to conversion
2305 function, which should raise an exception on bad input.
2306 - `datalines`: a list of input strings.
2307
2308 :Return:
2309 - Success value, 1 or 0.
2310 - An option dictionary on success, an error string on failure.
2311 """
2312 node = nodes.field_list()
2313 newline_offset, blank_finish = self.nested_list_parse(
2314 datalines, 0, node, initial_state='ExtensionOptions',
2315 blank_finish=True)
2316 if newline_offset != len(datalines): # incomplete parse of block
2317 return 0, 'invalid option block'
2318 try:
2319 options = utils.extract_extension_options(node, option_spec)
2320 except KeyError as detail:
2321 return 0, 'unknown option: "%s"' % detail.args[0]
2322 except (ValueError, TypeError) as detail:
2323 return 0, 'invalid option value: %s' % ' '.join(detail.args)
2324 except utils.ExtensionOptionError as detail:
2325 return 0, 'invalid option data: %s' % ' '.join(detail.args)
2326 if blank_finish:
2327 return 1, options
2328 else:
2329 return 0, 'option data incompletely parsed'
2330
2331 def unknown_directive(self, type_name):
2332 lineno = self.state_machine.abs_line_number()
2333 (indented, indent, offset, blank_finish
2334 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)
2335 text = '\n'.join(indented)
2336 error = self.reporter.error('Unknown directive type "%s".' % type_name,
2337 nodes.literal_block(text, text),
2338 line=lineno)
2339 return [error], blank_finish
2340
2341 def comment(self, match):
2342 if self.state_machine.is_next_line_blank():
2343 first_comment_line = match.string[match.end():]
2344 if not first_comment_line.strip(): # empty comment
2345 return [nodes.comment()], True # "A tiny but practical wart."
2346 if first_comment_line.startswith('end of inclusion from "'):
2347 # cf. parsers.rst.directives.misc.Include
2348 self.document.include_log.pop()
2349 return [], True
2350 (indented, indent, offset, blank_finish
2351 ) = self.state_machine.get_first_known_indented(match.end())
2352 while indented and not indented[-1].strip():
2353 indented.trim_end()
2354 text = '\n'.join(indented)
2355 return [nodes.comment(text, text)], blank_finish
2356
2357 explicit.constructs = [
2358 (footnote,
2359 re.compile(r"""
2360 \.\.[ ]+ # explicit markup start
2361 \[
2362 ( # footnote label:
2363 [0-9]+ # manually numbered footnote
2364 | # *OR*
2365 \# # anonymous auto-numbered footnote
2366 | # *OR*
2367 \#%s # auto-number ed?) footnote label
2368 | # *OR*
2369 \* # auto-symbol footnote
2370 )
2371 \]
2372 ([ ]+|$) # whitespace or end of line
2373 """ % Inliner.simplename, re.VERBOSE)),
2374 (citation,
2375 re.compile(r"""
2376 \.\.[ ]+ # explicit markup start
2377 \[(%s)\] # citation label
2378 ([ ]+|$) # whitespace or end of line
2379 """ % Inliner.simplename, re.VERBOSE)),
2380 (hyperlink_target,
2381 re.compile(r"""
2382 \.\.[ ]+ # explicit markup start
2383 _ # target indicator
2384 (?![ ]|$) # first char. not space or EOL
2385 """, re.VERBOSE)),
2386 (substitution_def,
2387 re.compile(r"""
2388 \.\.[ ]+ # explicit markup start
2389 \| # substitution indicator
2390 (?![ ]|$) # first char. not space or EOL
2391 """, re.VERBOSE)),
2392 (directive,
2393 re.compile(r"""
2394 \.\.[ ]+ # explicit markup start
2395 (%s) # directive name
2396 [ ]? # optional space
2397 :: # directive delimiter
2398 ([ ]+|$) # whitespace or end of line
2399 """ % Inliner.simplename, re.VERBOSE))]
2400
2401 def explicit_markup(self, match, context, next_state):
2402 """Footnotes, hyperlink targets, directives, comments."""
2403 nodelist, blank_finish = self.explicit_construct(match)
2404 self.parent += nodelist
2405 self.explicit_list(blank_finish)
2406 return [], next_state, []
2407
2408 def explicit_construct(self, match):
2409 """Determine which explicit construct this is, parse & return it."""
2410 errors = []
2411 for method, pattern in self.explicit.constructs:
2412 expmatch = pattern.match(match.string)
2413 if expmatch:
2414 try:
2415 return method(self, expmatch)
2416 except MarkupError as error:
2417 lineno = self.state_machine.abs_line_number()
2418 message = ' '.join(error.args)
2419 errors.append(self.reporter.warning(message, line=lineno))
2420 break
2421 nodelist, blank_finish = self.comment(match)
2422 return nodelist + errors, blank_finish
2423
2424 def explicit_list(self, blank_finish) -> None:
2425 """
2426 Create a nested state machine for a series of explicit markup
2427 constructs (including anonymous hyperlink targets).
2428 """
2429 offset = self.state_machine.line_offset + 1 # next line
2430 newline_offset, blank_finish = self.nested_list_parse(
2431 self.state_machine.input_lines[offset:],
2432 input_offset=self.state_machine.abs_line_offset() + 1,
2433 node=self.parent, initial_state='Explicit',
2434 blank_finish=blank_finish,
2435 match_titles=self.state_machine.match_titles)
2436 self.goto_line(newline_offset)
2437 if not blank_finish:
2438 self.parent += self.unindent_warning('Explicit markup')
2439
2440 def anonymous(self, match, context, next_state):
2441 """Anonymous hyperlink targets."""
2442 nodelist, blank_finish = self.anonymous_target(match)
2443 self.parent += nodelist
2444 self.explicit_list(blank_finish)
2445 return [], next_state, []
2446
2447 def anonymous_target(self, match):
2448 lineno = self.state_machine.abs_line_number()
2449 (block, indent, offset, blank_finish
2450 ) = self.state_machine.get_first_known_indented(match.end(),
2451 until_blank=True)
2452 blocktext = match.string[:match.end()] + '\n'.join(block)
2453 block = [escape2null(line) for line in block]
2454 target = self.make_target(block, blocktext, lineno, '')
2455 return [target], blank_finish
2456
2457 def line(self, match, context, next_state):
2458 """Section title overline or transition marker."""
2459 if self.state_machine.match_titles:
2460 return [match.string], 'Line', []
2461 elif match.string.strip() == '::':
2462 raise statemachine.TransitionCorrection('text')
2463 elif len(match.string.strip()) < 4:
2464 msg = self.reporter.info(
2465 'Unexpected possible title overline or transition.\n'
2466 "Treating it as ordinary text because it's so short.",
2467 line=self.state_machine.abs_line_number())
2468 self.parent += msg
2469 raise statemachine.TransitionCorrection('text')
2470 else:
2471 blocktext = self.state_machine.line
2472 msg = self.reporter.error(
2473 'Unexpected section title or transition.',
2474 nodes.literal_block(blocktext, blocktext),
2475 line=self.state_machine.abs_line_number())
2476 self.parent += msg
2477 return [], next_state, []
2478
2479 def text(self, match, context, next_state):
2480 """Titles, definition lists, paragraphs."""
2481 return [match.string], 'Text', []
2482
2483
2484class RFC2822Body(Body):
2485
2486 """
2487 RFC2822 headers are only valid as the first constructs in documents. As
2488 soon as anything else appears, the `Body` state should take over.
2489 """
2490
2491 patterns = Body.patterns.copy() # can't modify the original
2492 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
2493 initial_transitions = [(name, 'Body')
2494 for name in Body.initial_transitions]
2495 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2496
2497 def rfc2822(self, match, context, next_state):
2498 """RFC2822-style field list item."""
2499 fieldlist = nodes.field_list(classes=['rfc2822'])
2500 self.parent += fieldlist
2501 field, blank_finish = self.rfc2822_field(match)
2502 fieldlist += field
2503 offset = self.state_machine.line_offset + 1 # next line
2504 newline_offset, blank_finish = self.nested_list_parse(
2505 self.state_machine.input_lines[offset:],
2506 input_offset=self.state_machine.abs_line_offset() + 1,
2507 node=fieldlist, initial_state='RFC2822List',
2508 blank_finish=blank_finish)
2509 self.goto_line(newline_offset)
2510 if not blank_finish:
2511 self.parent += self.unindent_warning(
2512 'RFC2822-style field list')
2513 return [], next_state, []
2514
2515 def rfc2822_field(self, match):
2516 name = match.string[:match.string.find(':')]
2517 (indented, indent, line_offset, blank_finish
2518 ) = self.state_machine.get_first_known_indented(match.end(),
2519 until_blank=True)
2520 fieldnode = nodes.field()
2521 fieldnode += nodes.field_name(name, name)
2522 fieldbody = nodes.field_body('\n'.join(indented))
2523 fieldnode += fieldbody
2524 if indented:
2525 self.nested_parse(indented, input_offset=line_offset,
2526 node=fieldbody)
2527 return fieldnode, blank_finish
2528
2529
2530class SpecializedBody(Body):
2531
2532 """
2533 Superclass for second and subsequent compound element members. Compound
2534 elements are lists and list-like constructs.
2535
2536 All transition methods are disabled (redefined as `invalid_input`).
2537 Override individual methods in subclasses to re-enable.
2538
2539 For example, once an initial bullet list item, say, is recognized, the
2540 `BulletList` subclass takes over, with a "bullet_list" node as its
2541 container. Upon encountering the initial bullet list item, `Body.bullet`
2542 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2543 starts up a nested parsing session with `BulletList` as the initial state.
2544 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2545 as only bullet list items are encountered, they are parsed and inserted
2546 into the container. The first construct which is *not* a bullet list item
2547 triggers the `invalid_input` method, which ends the nested parse and
2548 closes the container. `BulletList` needs to recognize input that is
2549 invalid in the context of a bullet list, which means everything *other
2550 than* bullet list items, so it inherits the transition list created in
2551 `Body`.
2552 """
2553
2554 def invalid_input(self, match=None, context=None, next_state=None):
2555 """Not a compound element member. Abort this state machine."""
2556 self.state_machine.previous_line() # back up so parent SM can reassess
2557 raise EOFError
2558
2559 indent = invalid_input
2560 bullet = invalid_input
2561 enumerator = invalid_input
2562 field_marker = invalid_input
2563 option_marker = invalid_input
2564 doctest = invalid_input
2565 line_block = invalid_input
2566 grid_table_top = invalid_input
2567 simple_table_top = invalid_input
2568 explicit_markup = invalid_input
2569 anonymous = invalid_input
2570 line = invalid_input
2571 text = invalid_input
2572
2573
2574class BulletList(SpecializedBody):
2575
2576 """Second and subsequent bullet_list list_items."""
2577
2578 def bullet(self, match, context, next_state):
2579 """Bullet list item."""
2580 if match.string[0] != self.parent['bullet']:
2581 # different bullet: new list
2582 self.invalid_input()
2583 listitem, blank_finish = self.list_item(match.end())
2584 self.parent += listitem
2585 self.blank_finish = blank_finish
2586 return [], next_state, []
2587
2588
2589class DefinitionList(SpecializedBody):
2590
2591 """Second and subsequent definition_list_items."""
2592
2593 def text(self, match, context, next_state):
2594 """Definition lists."""
2595 return [match.string], 'Definition', []
2596
2597
2598class EnumeratedList(SpecializedBody):
2599
2600 """Second and subsequent enumerated_list list_items."""
2601
2602 def enumerator(self, match, context, next_state):
2603 """Enumerated list item."""
2604 format, sequence, text, ordinal = self.parse_enumerator(
2605 match, self.parent['enumtype'])
2606 if (format != self.format
2607 or (sequence != '#' and (sequence != self.parent['enumtype']
2608 or self.auto
2609 or ordinal != (self.lastordinal + 1)))
2610 or not self.is_enumerated_list_item(ordinal, sequence, format)):
2611 # different enumeration: new list
2612 self.invalid_input()
2613 if sequence == '#':
2614 self.auto = 1
2615 listitem, blank_finish = self.list_item(match.end())
2616 self.parent += listitem
2617 self.blank_finish = blank_finish
2618 self.lastordinal = ordinal
2619 return [], next_state, []
2620
2621
2622class FieldList(SpecializedBody):
2623
2624 """Second and subsequent field_list fields."""
2625
2626 def field_marker(self, match, context, next_state):
2627 """Field list field."""
2628 field, blank_finish = self.field(match)
2629 self.parent += field
2630 self.blank_finish = blank_finish
2631 return [], next_state, []
2632
2633
2634class OptionList(SpecializedBody):
2635
2636 """Second and subsequent option_list option_list_items."""
2637
2638 def option_marker(self, match, context, next_state):
2639 """Option list item."""
2640 try:
2641 option_list_item, blank_finish = self.option_list_item(match)
2642 except MarkupError:
2643 self.invalid_input()
2644 self.parent += option_list_item
2645 self.blank_finish = blank_finish
2646 return [], next_state, []
2647
2648
2649class RFC2822List(SpecializedBody, RFC2822Body):
2650
2651 """Second and subsequent RFC2822-style field_list fields."""
2652
2653 patterns = RFC2822Body.patterns
2654 initial_transitions = RFC2822Body.initial_transitions
2655
2656 def rfc2822(self, match, context, next_state):
2657 """RFC2822-style field list item."""
2658 field, blank_finish = self.rfc2822_field(match)
2659 self.parent += field
2660 self.blank_finish = blank_finish
2661 return [], 'RFC2822List', []
2662
2663 blank = SpecializedBody.invalid_input
2664
2665
2666class ExtensionOptions(FieldList):
2667
2668 """
2669 Parse field_list fields for extension options.
2670
2671 No nested parsing is done (including inline markup parsing).
2672 """
2673
2674 def parse_field_body(self, indented, offset, node) -> None:
2675 """Override `Body.parse_field_body` for simpler parsing."""
2676 lines = []
2677 for line in list(indented) + ['']:
2678 if line.strip():
2679 lines.append(line)
2680 elif lines:
2681 text = '\n'.join(lines)
2682 node += nodes.paragraph(text, text)
2683 lines = []
2684
2685
2686class LineBlock(SpecializedBody):
2687
2688 """Second and subsequent lines of a line_block."""
2689
2690 blank = SpecializedBody.invalid_input
2691
2692 def line_block(self, match, context, next_state):
2693 """New line of line block."""
2694 lineno = self.state_machine.abs_line_number()
2695 line, messages, blank_finish = self.line_block_line(match, lineno)
2696 self.parent += line
2697 self.parent.parent += messages
2698 self.blank_finish = blank_finish
2699 return [], next_state, []
2700
2701
2702class Explicit(SpecializedBody):
2703
2704 """Second and subsequent explicit markup construct."""
2705
2706 def explicit_markup(self, match, context, next_state):
2707 """Footnotes, hyperlink targets, directives, comments."""
2708 nodelist, blank_finish = self.explicit_construct(match)
2709 self.parent += nodelist
2710 self.blank_finish = blank_finish
2711 return [], next_state, []
2712
2713 def anonymous(self, match, context, next_state):
2714 """Anonymous hyperlink targets."""
2715 nodelist, blank_finish = self.anonymous_target(match)
2716 self.parent += nodelist
2717 self.blank_finish = blank_finish
2718 return [], next_state, []
2719
2720 blank = SpecializedBody.invalid_input
2721
2722
2723class SubstitutionDef(Body):
2724
2725 """
2726 Parser for the contents of a substitution_definition element.
2727 """
2728
2729 patterns = {
2730 'embedded_directive': re.compile(r'(%s)::( +|$)'
2731 % Inliner.simplename),
2732 'text': r''}
2733 initial_transitions = ['embedded_directive', 'text']
2734
2735 def embedded_directive(self, match, context, next_state):
2736 nodelist, blank_finish = self.directive(match,
2737 alt=self.parent['names'][0])
2738 self.parent += nodelist
2739 if not self.state_machine.at_eof():
2740 self.blank_finish = blank_finish
2741 raise EOFError
2742
2743 def text(self, match, context, next_state):
2744 if not self.state_machine.at_eof():
2745 self.blank_finish = self.state_machine.is_next_line_blank()
2746 raise EOFError
2747
2748
2749class Text(RSTState):
2750
2751 """
2752 Classifier of second line of a text block.
2753
2754 Could be a paragraph, a definition list item, or a title.
2755 """
2756
2757 patterns = {'underline': Body.patterns['line'],
2758 'text': r''}
2759 initial_transitions = [('underline', 'Body'), ('text', 'Body')]
2760
2761 def blank(self, match, context, next_state):
2762 """End of paragraph."""
2763 # NOTE: self.paragraph returns [node, system_message(s)], literalnext
2764 paragraph, literalnext = self.paragraph(
2765 context, self.state_machine.abs_line_number() - 1)
2766 self.parent += paragraph
2767 if literalnext:
2768 self.parent += self.literal_block()
2769 return [], 'Body', []
2770
2771 def eof(self, context):
2772 if context:
2773 self.blank(None, context, None)
2774 return []
2775
2776 def indent(self, match, context, next_state):
2777 """Definition list item."""
2778 dl = nodes.definition_list()
2779 # the definition list starts on the line before the indent:
2780 lineno = self.state_machine.abs_line_number() - 1
2781 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)
2782 dl_item, blank_finish = self.definition_list_item(context)
2783 dl += dl_item
2784 self.parent += dl
2785 offset = self.state_machine.line_offset + 1 # next line
2786 newline_offset, blank_finish = self.nested_list_parse(
2787 self.state_machine.input_lines[offset:],
2788 input_offset=self.state_machine.abs_line_offset() + 1,
2789 node=dl, initial_state='DefinitionList',
2790 blank_finish=blank_finish, blank_finish_state='Definition')
2791 self.goto_line(newline_offset)
2792 if not blank_finish:
2793 self.parent += self.unindent_warning('Definition list')
2794 return [], 'Body', []
2795
2796 def underline(self, match, context, next_state):
2797 """Section title."""
2798 lineno = self.state_machine.abs_line_number()
2799 title = context[0].rstrip()
2800 underline = match.string.rstrip()
2801 source = title + '\n' + underline
2802 messages = []
2803 if column_width(title) > len(underline):
2804 if len(underline) < 4:
2805 if self.state_machine.match_titles:
2806 msg = self.reporter.info(
2807 'Possible title underline, too short for the title.\n'
2808 "Treating it as ordinary text because it's so short.",
2809 line=lineno)
2810 self.parent += msg
2811 raise statemachine.TransitionCorrection('text')
2812 else:
2813 blocktext = context[0] + '\n' + self.state_machine.line
2814 msg = self.reporter.warning(
2815 'Title underline too short.',
2816 nodes.literal_block(blocktext, blocktext),
2817 line=lineno)
2818 messages.append(msg)
2819 if not self.state_machine.match_titles:
2820 blocktext = context[0] + '\n' + self.state_machine.line
2821 # We need get_source_and_line() here to report correctly
2822 src, srcline = self.state_machine.get_source_and_line()
2823 # TODO: why is abs_line_number() == srcline+1
2824 # if the error is in a table (try with test_tables.py)?
2825 # print("get_source_and_line", srcline)
2826 # print("abs_line_number", self.state_machine.abs_line_number())
2827 msg = self.reporter.error(
2828 'Unexpected section title.',
2829 nodes.literal_block(blocktext, blocktext),
2830 source=src, line=srcline)
2831 self.parent += messages
2832 self.parent += msg
2833 return [], next_state, []
2834 style = underline[0]
2835 context[:] = []
2836 self.section(title, source, style, lineno - 1, messages)
2837 return [], next_state, []
2838
2839 def text(self, match, context, next_state):
2840 """Paragraph."""
2841 startline = self.state_machine.abs_line_number() - 1
2842 msg = None
2843 try:
2844 block = self.state_machine.get_text_block(flush_left=True)
2845 except statemachine.UnexpectedIndentationError as err:
2846 block, src, srcline = err.args
2847 msg = self.reporter.error('Unexpected indentation.',
2848 source=src, line=srcline)
2849 lines = context + list(block)
2850 paragraph, literalnext = self.paragraph(lines, startline)
2851 self.parent += paragraph
2852 self.parent += msg
2853 if literalnext:
2854 try:
2855 self.state_machine.next_line()
2856 except EOFError:
2857 pass
2858 self.parent += self.literal_block()
2859 return [], next_state, []
2860
2861 def literal_block(self):
2862 """Return a list of nodes."""
2863 (indented, indent, offset, blank_finish
2864 ) = self.state_machine.get_indented()
2865 while indented and not indented[-1].strip():
2866 indented.trim_end()
2867 if not indented:
2868 return self.quoted_literal_block()
2869 data = '\n'.join(indented)
2870 literal_block = nodes.literal_block(data, data)
2871 (literal_block.source,
2872 literal_block.line) = self.state_machine.get_source_and_line(offset+1)
2873 nodelist = [literal_block]
2874 if not blank_finish:
2875 nodelist.append(self.unindent_warning('Literal block'))
2876 return nodelist
2877
2878 def quoted_literal_block(self):
2879 abs_line_offset = self.state_machine.abs_line_offset()
2880 offset = self.state_machine.line_offset
2881 parent_node = nodes.Element()
2882 new_abs_offset = self.nested_parse(
2883 self.state_machine.input_lines[offset:],
2884 input_offset=abs_line_offset, node=parent_node, match_titles=False,
2885 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
2886 'initial_state': 'QuotedLiteralBlock'})
2887 self.goto_line(new_abs_offset)
2888 return parent_node.children
2889
2890 def definition_list_item(self, termline):
2891 # the parser is already on the second (indented) line:
2892 dd_lineno = self.state_machine.abs_line_number()
2893 dt_lineno = dd_lineno - 1
2894 (indented, indent, line_offset, blank_finish
2895 ) = self.state_machine.get_indented()
2896 dl_item = nodes.definition_list_item(
2897 '\n'.join(termline + list(indented)))
2898 (dl_item.source,
2899 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)
2900 dt_nodes, messages = self.term(termline, dt_lineno)
2901 dl_item += dt_nodes
2902 dd = nodes.definition('', *messages)
2903 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)
2904 dl_item += dd
2905 if termline[0][-2:] == '::':
2906 dd += self.reporter.info(
2907 'Blank line missing before literal block (after the "::")? '
2908 'Interpreted as a definition list item.',
2909 line=dd_lineno)
2910 # TODO: drop a definition if it is an empty comment to allow
2911 # definition list items with several terms?
2912 # https://sourceforge.net/p/docutils/feature-requests/60/
2913 self.nested_parse(indented, input_offset=line_offset, node=dd)
2914 return dl_item, blank_finish
2915
2916 classifier_delimiter = re.compile(' +: +')
2917
2918 def term(self, lines, lineno):
2919 """Return a definition_list's term and optional classifiers."""
2920 assert len(lines) == 1
2921 text_nodes, messages = self.inline_text(lines[0], lineno)
2922 dt = nodes.term(lines[0])
2923 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)
2924 node_list = [dt]
2925 for i in range(len(text_nodes)):
2926 node = text_nodes[i]
2927 if isinstance(node, nodes.Text):
2928 parts = self.classifier_delimiter.split(node)
2929 if len(parts) == 1:
2930 node_list[-1] += node
2931 else:
2932 text = parts[0].rstrip()
2933 textnode = nodes.Text(text)
2934 node_list[-1] += textnode
2935 node_list += [nodes.classifier(unescape(part, True), part)
2936 for part in parts[1:]]
2937 else:
2938 node_list[-1] += node
2939 return node_list, messages
2940
2941
2942class SpecializedText(Text):
2943
2944 """
2945 Superclass for second and subsequent lines of Text-variants.
2946
2947 All transition methods are disabled. Override individual methods in
2948 subclasses to re-enable.
2949 """
2950
2951 def eof(self, context):
2952 """Incomplete construct."""
2953 return []
2954
2955 def invalid_input(self, match=None, context=None, next_state=None):
2956 """Not a compound element member. Abort this state machine."""
2957 raise EOFError
2958
2959 blank = invalid_input
2960 indent = invalid_input
2961 underline = invalid_input
2962 text = invalid_input
2963
2964
2965class Definition(SpecializedText):
2966
2967 """Second line of potential definition_list_item."""
2968
2969 def eof(self, context):
2970 """Not a definition."""
2971 self.state_machine.previous_line(2) # so parent SM can reassess
2972 return []
2973
2974 def indent(self, match, context, next_state):
2975 """Definition list item."""
2976 dl_item, blank_finish = self.definition_list_item(context)
2977 self.parent += dl_item
2978 self.blank_finish = blank_finish
2979 return [], 'DefinitionList', []
2980
2981
2982class Line(SpecializedText):
2983
2984 """
2985 Second line of over- & underlined section title or transition marker.
2986 """
2987
2988 eofcheck = 1 # ignored, will be removed in Docutils 2.0.
2989
2990 def eof(self, context):
2991 """Transition marker at end of section or document."""
2992 marker = context[0].strip()
2993 if len(marker) < 4:
2994 self.state_correction(context)
2995 src, srcline = self.state_machine.get_source_and_line()
2996 # lineno = self.state_machine.abs_line_number() - 1
2997 transition = nodes.transition(rawsource=context[0])
2998 transition.source = src
2999 transition.line = srcline - 1
3000 # transition.line = lineno
3001 self.parent += transition
3002 return []
3003
3004 def blank(self, match, context, next_state):
3005 """Transition marker."""
3006 src, srcline = self.state_machine.get_source_and_line()
3007 marker = context[0].strip()
3008 if len(marker) < 4:
3009 self.state_correction(context)
3010 transition = nodes.transition(rawsource=marker)
3011 transition.source = src
3012 transition.line = srcline - 1
3013 self.parent += transition
3014 return [], 'Body', []
3015
3016 def text(self, match, context, next_state):
3017 """Potential over- & underlined title."""
3018 lineno = self.state_machine.abs_line_number() - 1
3019 overline = context[0]
3020 title = match.string
3021 underline = ''
3022 try:
3023 underline = self.state_machine.next_line()
3024 except EOFError:
3025 blocktext = overline + '\n' + title
3026 if len(overline.rstrip()) < 4:
3027 self.short_overline(context, blocktext, lineno, 2)
3028 else:
3029 msg = self.reporter.error(
3030 'Incomplete section title.',
3031 nodes.literal_block(blocktext, blocktext),
3032 line=lineno)
3033 self.parent += msg
3034 return [], 'Body', []
3035 source = '%s\n%s\n%s' % (overline, title, underline)
3036 overline = overline.rstrip()
3037 underline = underline.rstrip()
3038 if not self.transitions['underline'][0].match(underline):
3039 blocktext = overline + '\n' + title + '\n' + underline
3040 if len(overline.rstrip()) < 4:
3041 self.short_overline(context, blocktext, lineno, 2)
3042 else:
3043 msg = self.reporter.error(
3044 'Missing matching underline for section title overline.',
3045 nodes.literal_block(source, source),
3046 line=lineno)
3047 self.parent += msg
3048 return [], 'Body', []
3049 elif overline != underline:
3050 blocktext = overline + '\n' + title + '\n' + underline
3051 if len(overline.rstrip()) < 4:
3052 self.short_overline(context, blocktext, lineno, 2)
3053 else:
3054 msg = self.reporter.error(
3055 'Title overline & underline mismatch.',
3056 nodes.literal_block(source, source),
3057 line=lineno)
3058 self.parent += msg
3059 return [], 'Body', []
3060 title = title.rstrip()
3061 messages = []
3062 if column_width(title) > len(overline):
3063 blocktext = overline + '\n' + title + '\n' + underline
3064 if len(overline.rstrip()) < 4:
3065 self.short_overline(context, blocktext, lineno, 2)
3066 else:
3067 msg = self.reporter.warning(
3068 'Title overline too short.',
3069 nodes.literal_block(source, source),
3070 line=lineno)
3071 messages.append(msg)
3072 style = (overline[0], underline[0])
3073 self.section(title.lstrip(), source, style, lineno + 1, messages)
3074 return [], 'Body', []
3075
3076 indent = text # indented title
3077
3078 def underline(self, match, context, next_state):
3079 overline = context[0]
3080 blocktext = overline + '\n' + self.state_machine.line
3081 lineno = self.state_machine.abs_line_number() - 1
3082 if len(overline.rstrip()) < 4:
3083 self.short_overline(context, blocktext, lineno, 1)
3084 msg = self.reporter.error(
3085 'Invalid section title or transition marker.',
3086 nodes.literal_block(blocktext, blocktext),
3087 line=lineno)
3088 self.parent += msg
3089 return [], 'Body', []
3090
3091 def short_overline(self, context, blocktext, lineno, lines=1) -> None:
3092 msg = self.reporter.info(
3093 'Possible incomplete section title.\nTreating the overline as '
3094 "ordinary text because it's so short.",
3095 line=lineno)
3096 self.parent += msg
3097 self.state_correction(context, lines)
3098
3099 def state_correction(self, context, lines=1):
3100 self.state_machine.previous_line(lines)
3101 context[:] = []
3102 raise statemachine.StateCorrection('Body', 'text')
3103
3104
3105class QuotedLiteralBlock(RSTState):
3106
3107 """
3108 Nested parse handler for quoted (unindented) literal blocks.
3109
3110 Special-purpose. Not for inclusion in `state_classes`.
3111 """
3112
3113 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
3114 'text': r''}
3115 initial_transitions = ('initial_quoted', 'text')
3116
3117 def __init__(self, state_machine, debug=False) -> None:
3118 RSTState.__init__(self, state_machine, debug)
3119 self.messages = []
3120 self.initial_lineno = None
3121
3122 def blank(self, match, context, next_state):
3123 if context:
3124 raise EOFError
3125 else:
3126 return context, next_state, []
3127
3128 def eof(self, context):
3129 if context:
3130 src, srcline = self.state_machine.get_source_and_line(
3131 self.initial_lineno)
3132 text = '\n'.join(context)
3133 literal_block = nodes.literal_block(text, text)
3134 literal_block.source = src
3135 literal_block.line = srcline
3136 self.parent += literal_block
3137 else:
3138 self.parent += self.reporter.warning(
3139 'Literal block expected; none found.',
3140 line=self.state_machine.abs_line_number()
3141 ) # src not available, statemachine.input_lines is empty
3142 self.state_machine.previous_line()
3143 self.parent += self.messages
3144 return []
3145
3146 def indent(self, match, context, next_state):
3147 assert context, ('QuotedLiteralBlock.indent: context should not '
3148 'be empty!')
3149 self.messages.append(
3150 self.reporter.error('Unexpected indentation.',
3151 line=self.state_machine.abs_line_number()))
3152 self.state_machine.previous_line()
3153 raise EOFError
3154
3155 def initial_quoted(self, match, context, next_state):
3156 """Match arbitrary quote character on the first line only."""
3157 self.remove_transition('initial_quoted')
3158 quote = match.string[0]
3159 pattern = re.compile(re.escape(quote))
3160 # New transition matches consistent quotes only:
3161 self.add_transition('quoted',
3162 (pattern, self.quoted, self.__class__.__name__))
3163 self.initial_lineno = self.state_machine.abs_line_number()
3164 return [match.string], next_state, []
3165
3166 def quoted(self, match, context, next_state):
3167 """Match consistent quotes on subsequent lines."""
3168 context.append(match.string)
3169 return context, next_state, []
3170
3171 def text(self, match, context, next_state):
3172 if context:
3173 self.messages.append(
3174 self.reporter.error('Inconsistent literal block quoting.',
3175 line=self.state_machine.abs_line_number()))
3176 self.state_machine.previous_line()
3177 raise EOFError
3178
3179
3180state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
3181 OptionList, LineBlock, ExtensionOptions, Explicit, Text,
3182 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
3183"""Standard set of State classes used to start `RSTStateMachine`."""