1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
4
5"""
6This is the ``docutils.parsers.rst.states`` module, the core of
7the reStructuredText parser. It defines the following:
8
9:Classes:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: obsolete, use `types.SimpleNamespace`.
30
31:Exception classes:
32 - `MarkupError`
33 - `ParserError`
34 - `MarkupMismatch`
35
36:Functions:
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
39
40:Attributes:
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
42
43Parser Overview
44===============
45
46The reStructuredText parser is implemented as a recursive state machine,
47examining its input one line at a time. To understand how the parser works,
48please first become familiar with the `docutils.statemachine` module. In the
49description below, references are made to classes defined in this module;
50please see the individual classes for details.
51
52Parsing proceeds as follows:
53
541. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
58
592. The method associated with the matched transition pattern is called.
60
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
65
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
70
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
73
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
83
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
86
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
90
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
93
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
96 used.
97
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
101"""
102
103from __future__ import annotations
104
105__docformat__ = 'reStructuredText'
106
107import copy
108import re
109from types import FunctionType, MethodType
110from types import SimpleNamespace as Struct
111import warnings
112
113from docutils import nodes, statemachine, utils
114from docutils import ApplicationError, DataError
115from docutils.statemachine import StateMachineWS, StateWS
116from docutils.nodes import fully_normalize_name as normalize_name
117from docutils.nodes import unescape, whitespace_normalize_name
118import docutils.parsers.rst
119from docutils.parsers.rst import directives, languages, tableparser, roles
120from docutils.utils import escape2null, column_width
121from docutils.utils import punctuation_chars, urischemes
122from docutils.utils import split_escaped_whitespace
123from docutils.utils._roman_numerals import (InvalidRomanNumeralError,
124 RomanNumeral)
125
126TYPE_CHECKING = False
127if TYPE_CHECKING:
128 from docutils.statemachine import StringList
129
130
131class MarkupError(DataError): pass
132class UnknownInterpretedRoleError(DataError): pass
133class InterpretedRoleNotImplementedError(DataError): pass
134class ParserError(ApplicationError): pass
135class MarkupMismatch(Exception): pass
136
137
138class RSTStateMachine(StateMachineWS):
139
140 """
141 reStructuredText's master StateMachine.
142
143 The entry point to reStructuredText parsing is the `run()` method.
144 """
145
146 def run(self, input_lines, document, input_offset=0, match_titles=True,
147 inliner=None) -> None:
148 """
149 Parse `input_lines` and modify the `document` node in place.
150
151 Extend `StateMachineWS.run()`: set up parse-global data and
152 run the StateMachine.
153 """
154 self.language = languages.get_language(
155 document.settings.language_code, document.reporter)
156 self.match_titles = match_titles
157 if inliner is None:
158 inliner = Inliner()
159 inliner.init_customizations(document.settings)
160 # A collection of objects to share with nested parsers.
161 # The attributes `reporter`, `section_level`, and
162 # `section_bubble_up_kludge` will be removed in Docutils 2.0
163 self.memo = Struct(document=document,
164 reporter=document.reporter, # ignored
165 language=self.language,
166 title_styles=[],
167 section_level=0, # ignored
168 section_bubble_up_kludge=False, # ignored
169 inliner=inliner)
170 self.document = document
171 self.attach_observer(document.note_source)
172 self.reporter = self.document.reporter
173 self.node = document
174 results = StateMachineWS.run(self, input_lines, input_offset,
175 input_source=document['source'])
176 assert results == [], 'RSTStateMachine.run() results should be empty!'
177 self.node = self.memo = None # remove unneeded references
178
179
180class NestedStateMachine(StateMachineWS):
181 """
182 StateMachine run from within other StateMachine runs, to parse nested
183 document structures.
184 """
185
186 def run(self, input_lines, input_offset, memo, node, match_titles=True):
187 """
188 Parse `input_lines` and populate `node`.
189
190 Use a separate "title style hierarchy" (changed in Docutils 0.23).
191
192 Extend `StateMachineWS.run()`: set up document-wide data.
193 """
194 self.match_titles = match_titles
195 self.memo = copy.copy(memo)
196 self.document = memo.document
197 self.attach_observer(self.document.note_source)
198 self.language = memo.language
199 self.reporter = self.document.reporter
200 self.node = node
201 if match_titles:
202 # Use a separate section title style hierarchy;
203 # ensure all sections in the `input_lines` are treated as
204 # subsections of the current section by blocking lower
205 # section levels with a style that is impossible in rST:
206 self.memo.title_styles = ['x'] * len(node.section_hierarchy())
207 results = StateMachineWS.run(self, input_lines, input_offset)
208 assert results == [], ('NestedStateMachine.run() results should be '
209 'empty!')
210 return results
211
212
213class RSTState(StateWS):
214
215 """
216 reStructuredText State superclass.
217
218 Contains methods used by all State subclasses.
219 """
220
221 nested_sm = NestedStateMachine
222 nested_sm_cache = []
223
224 def __init__(self, state_machine, debug=False) -> None:
225 self.nested_sm_kwargs = {'state_classes': state_classes,
226 'initial_state': 'Body'}
227 StateWS.__init__(self, state_machine, debug)
228
229 def runtime_init(self) -> None:
230 StateWS.runtime_init(self)
231 memo = self.state_machine.memo
232 self.memo = memo
233 self.document = memo.document
234 self.inliner = memo.inliner
235 self.reporter = self.document.reporter
236 # enable the reporter to determine source and source-line
237 if not hasattr(self.reporter, 'get_source_and_line'):
238 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501
239
240 @property
241 def parent(self) -> nodes.Element | None:
242 return self.state_machine.node
243
244 @parent.setter
245 def parent(self, value: nodes.Element):
246 self.state_machine.node = value
247
248 def goto_line(self, abs_line_offset) -> None:
249 """
250 Jump to input line `abs_line_offset`, ignoring jumps past the end.
251 """
252 try:
253 self.state_machine.goto_line(abs_line_offset)
254 except EOFError:
255 pass
256
257 def no_match(self, context, transitions):
258 """
259 Override `StateWS.no_match` to generate a system message.
260
261 This code should never be run.
262 """
263 self.reporter.severe(
264 'Internal error: no transition pattern match. State: "%s"; '
265 'transitions: %s; context: %s; current line: %r.'
266 % (self.__class__.__name__, transitions, context,
267 self.state_machine.line))
268 return context, None, []
269
270 def bof(self, context):
271 """Called at beginning of file."""
272 return [], []
273
274 def nested_parse(self,
275 block: StringList,
276 input_offset: int,
277 node: nodes.Element,
278 match_titles: bool = False,
279 state_machine_class: StateMachineWS|None = None,
280 state_machine_kwargs: dict|None = None
281 ) -> int:
282 """
283 Parse the input `block` with a nested state-machine rooted at `node`.
284
285 :block:
286 reStructuredText source extract.
287 :input_offset:
288 Line number at start of the block.
289 :node:
290 Base node. All generated nodes will be appended to this node.
291 :match_titles:
292 Allow section titles?
293 A separate section title style hierarchy is used for the nested
294 parsing (all sections are subsections of the current section).
295 The calling code should check whether sections are valid
296 children of the base node and move them or warn otherwise.
297 :state_machine_class:
298 Default: `NestedStateMachine`.
299 :state_machine_kwargs:
300 Keyword arguments for the state-machine instantiation.
301 Default: `self.nested_sm_kwargs`.
302
303 Create a new state-machine instance if required.
304 Return new offset.
305 """
306 use_default = 0
307 if state_machine_class is None:
308 state_machine_class = self.nested_sm
309 use_default += 1
310 if state_machine_kwargs is None:
311 state_machine_kwargs = self.nested_sm_kwargs
312 use_default += 1
313 my_state_machine = None
314 if use_default == 2:
315 try:
316 # get cached state machine, prevent others from using it
317 my_state_machine = self.nested_sm_cache.pop()
318 except IndexError:
319 pass
320 if not my_state_machine:
321 my_state_machine = state_machine_class(
322 debug=self.debug,
323 parent_state_machine=self.state_machine,
324 **state_machine_kwargs)
325 # run the state machine and populate `node`:
326 block_length = len(block)
327 my_state_machine.run(block, input_offset, memo=self.memo,
328 node=node, match_titles=match_titles)
329 # clean up
330 new_offset = my_state_machine.abs_line_offset()
331 if use_default == 2:
332 self.nested_sm_cache.append(my_state_machine)
333 else:
334 my_state_machine.unlink()
335 # No `block.parent` implies disconnected -- lines aren't in sync:
336 if block.parent and (len(block) - block_length) != 0:
337 # Adjustment for block if modified in nested parse:
338 self.state_machine.next_line(len(block) - block_length)
339 return new_offset
340
341 def nested_list_parse(self, block, input_offset, node, initial_state,
342 blank_finish,
343 blank_finish_state=None,
344 extra_settings={},
345 match_titles=False, # deprecated, will be removed
346 state_machine_class=None,
347 state_machine_kwargs=None):
348 """
349 Parse the input `block` with a nested state-machine rooted at `node`.
350
351 Create a new StateMachine rooted at `node` and run it over the
352 input `block` (see also `nested_parse()`).
353 Also keep track of optional intermediate blank lines and the
354 required final one.
355
356 Return new offset and a boolean indicating whether there was a
357 blank final line.
358 """
359 if match_titles:
360 warnings.warn('The "match_titles" argument of '
361 'parsers.rst.states.RSTState.nested_list_parse() '
362 'will be ignored in Docutils 1.0 '
363 'and removed in Docutils 2.0.',
364 PendingDeprecationWarning, stacklevel=2)
365 if state_machine_class is None:
366 state_machine_class = self.nested_sm
367 if state_machine_kwargs is None:
368 state_machine_kwargs = self.nested_sm_kwargs.copy()
369 state_machine_kwargs['initial_state'] = initial_state
370 my_state_machine = state_machine_class(
371 debug=self.debug,
372 parent_state_machine=self.state_machine,
373 **state_machine_kwargs)
374 if blank_finish_state is None:
375 blank_finish_state = initial_state
376 my_state_machine.states[blank_finish_state].blank_finish = blank_finish
377 for key, value in extra_settings.items():
378 setattr(my_state_machine.states[initial_state], key, value)
379 my_state_machine.run(block, input_offset, memo=self.memo,
380 node=node, match_titles=match_titles)
381 blank_finish = my_state_machine.states[blank_finish_state].blank_finish
382 my_state_machine.unlink()
383 return my_state_machine.abs_line_offset(), blank_finish
384
385 def section(self, title, source, style, lineno, messages) -> None:
386 """Check for a valid subsection and create one if it checks out."""
387 if self.check_subsection(source, style, lineno):
388 self.new_subsection(title, lineno, messages)
389
390 def check_subsection(self, source, style, lineno) -> bool:
391 """
392 Check for a valid subsection header. Update section data in `memo`.
393
394 When a new section is reached that isn't a subsection of the current
395 section, set `self.parent` to the new section's parent section
396 (or the root node if the new section is a top-level section).
397 """
398 title_styles = self.memo.title_styles
399 parent_sections = self.parent.section_hierarchy()
400 # current section level: (0 root, 1 section, 2 subsection, ...)
401 oldlevel = len(parent_sections)
402 # new section level:
403 try: # check for existing title style
404 newlevel = title_styles.index(style) + 1
405 except ValueError: # new title style
406 newlevel = len(title_styles) + 1
407 # The new level must not be deeper than an immediate child
408 # of the current level:
409 if newlevel > oldlevel + 1:
410 styles = ' '.join('/'.join(style) for style in title_styles)
411 self.parent += self.reporter.error(
412 'Inconsistent title style:'
413 f' skip from level {oldlevel} to {newlevel}.',
414 nodes.literal_block('', source),
415 nodes.paragraph('', f'Established title styles: {styles}'),
416 line=lineno)
417 return False
418 # Update parent state:
419 if newlevel > len(title_styles):
420 title_styles.append(style)
421 self.memo.section_level = newlevel
422 if newlevel <= oldlevel:
423 # new section is sibling or higher up in the section hierarchy
424 self.parent = parent_sections[newlevel-1].parent
425 return True
426
427 def title_inconsistent(self, sourcetext, lineno):
428 # Ignored. Will be removed in Docutils 2.0.
429 error = self.reporter.error(
430 'Title level inconsistent:', nodes.literal_block('', sourcetext),
431 line=lineno)
432 return error
433
434 def new_subsection(self, title, lineno, messages):
435 """Append new subsection to document tree."""
436 section_node = nodes.section()
437 self.parent += section_node
438 textnodes, title_messages = self.inline_text(title, lineno)
439 titlenode = nodes.title(title, '', *textnodes)
440 name = normalize_name(titlenode.astext())
441 section_node['names'].append(name)
442 section_node += titlenode
443 section_node += messages
444 section_node += title_messages
445 self.document.note_implicit_target(section_node, section_node)
446 # Update state:
447 self.parent = section_node
448
449 def paragraph(self, lines, lineno):
450 """
451 Return a list (paragraph & messages) & a boolean: literal_block next?
452 """
453 data = '\n'.join(lines).rstrip()
454 if re.search(r'(?<!\\)(\\\\)*::$', data):
455 if len(data) == 2:
456 return [], 1
457 elif data[-3] in ' \n':
458 text = data[:-3].rstrip()
459 else:
460 text = data[:-1]
461 literalnext = 1
462 else:
463 text = data
464 literalnext = 0
465 textnodes, messages = self.inline_text(text, lineno)
466 p = nodes.paragraph(data, '', *textnodes)
467 p.source, p.line = self.state_machine.get_source_and_line(lineno)
468 return [p] + messages, literalnext
469
470 def inline_text(self, text, lineno):
471 """
472 Return 2 lists: nodes (text and inline elements), and system_messages.
473 """
474 nodes, messages = self.inliner.parse(text, lineno,
475 self.memo, self.parent)
476 return nodes, messages
477
478 def unindent_warning(self, node_name):
479 # the actual problem is one line below the current line
480 lineno = self.state_machine.abs_line_number() + 1
481 return self.reporter.warning('%s ends without a blank line; '
482 'unexpected unindent.' % node_name,
483 line=lineno)
484
485
486def build_regexp(definition, compile_patterns=True):
487 """
488 Build, compile and return a regular expression based on `definition`.
489
490 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
491 where "parts" is a list of regular expressions and/or regular
492 expression definitions to be joined into an or-group.
493 """
494 name, prefix, suffix, parts = definition
495 part_strings = []
496 for part in parts:
497 if isinstance(part, tuple):
498 part_strings.append(build_regexp(part, None))
499 else:
500 part_strings.append(part)
501 or_group = '|'.join(part_strings)
502 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
503 if compile_patterns:
504 return re.compile(regexp)
505 else:
506 return regexp
507
508
509class Inliner:
510
511 """
512 Parse inline markup; call the `parse()` method.
513 """
514
515 def __init__(self) -> None:
516 self.implicit_dispatch = []
517 """List of (pattern, bound method) tuples, used by
518 `self.implicit_inline`."""
519
520 def init_customizations(self, settings) -> None:
521 # lookahead and look-behind expressions for inline markup rules
522 if getattr(settings, 'character_level_inline_markup', False):
523 start_string_prefix = '(^|(?<!\x00))'
524 end_string_suffix = ''
525 else:
526 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %
527 (punctuation_chars.openers,
528 punctuation_chars.delimiters))
529 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %
530 (punctuation_chars.closing_delimiters,
531 punctuation_chars.delimiters,
532 punctuation_chars.closers))
533 args = locals().copy()
534 args.update(vars(self.__class__))
535
536 parts = ('initial_inline', start_string_prefix, '',
537 [
538 ('start', '', self.non_whitespace_after, # simple start-strings
539 [r'\*\*', # strong
540 r'\*(?!\*)', # emphasis but not strong
541 r'``', # literal
542 r'_`', # inline internal target
543 r'\|(?!\|)'] # substitution reference
544 ),
545 ('whole', '', end_string_suffix, # whole constructs
546 [ # reference name & end-string
547 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,
548 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
549 [r'[0-9]+', # manually numbered
550 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)
551 r'\*', # auto-symbol
552 r'(?P<citationlabel>%s)' % self.simplename, # citation ref
553 ]
554 )
555 ]
556 ),
557 ('backquote', # interpreted text or phrase reference
558 '(?P<role>(:%s:)?)' % self.simplename, # optional role
559 self.non_whitespace_after,
560 ['`(?!`)'] # but not literal
561 )
562 ]
563 )
564 self.start_string_prefix = start_string_prefix
565 self.end_string_suffix = end_string_suffix
566 self.parts = parts
567
568 self.patterns = Struct(
569 initial=build_regexp(parts),
570 emphasis=re.compile(self.non_whitespace_escape_before
571 + r'(\*)' + end_string_suffix),
572 strong=re.compile(self.non_whitespace_escape_before
573 + r'(\*\*)' + end_string_suffix),
574 interpreted_or_phrase_ref=re.compile(
575 r"""
576 %(non_unescaped_whitespace_escape_before)s
577 (
578 `
579 (?P<suffix>
580 (?P<role>:%(simplename)s:)?
581 (?P<refend>__?)?
582 )
583 )
584 %(end_string_suffix)s
585 """ % args, re.VERBOSE),
586 embedded_link=re.compile(
587 r"""
588 (
589 (?:[ \n]+|^) # spaces or beginning of line/string
590 < # open bracket
591 %(non_whitespace_after)s
592 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets
593 %(non_whitespace_escape_before)s
594 > # close bracket
595 )
596 $ # end of string
597 """ % args, re.VERBOSE),
598 literal=re.compile(self.non_whitespace_before + '(``)'
599 + end_string_suffix),
600 target=re.compile(self.non_whitespace_escape_before
601 + r'(`)' + end_string_suffix),
602 substitution_ref=re.compile(self.non_whitespace_escape_before
603 + r'(\|_{0,2})'
604 + end_string_suffix),
605 email=re.compile(self.email_pattern % args + '$',
606 re.VERBOSE),
607 uri=re.compile(
608 (r"""
609 %(start_string_prefix)s
610 (?P<whole>
611 (?P<absolute> # absolute URI
612 (?P<scheme> # scheme (http, ftp, mailto)
613 [a-zA-Z][a-zA-Z0-9.+-]*
614 )
615 :
616 (
617 ( # either:
618 (//?)? # hierarchical URI
619 %(uric)s* # URI characters
620 %(uri_end)s # final URI char
621 )
622 ( # optional query
623 \?%(uric)s*
624 %(uri_end)s
625 )?
626 ( # optional fragment
627 \#%(uric)s*
628 %(uri_end)s
629 )?
630 )
631 )
632 | # *OR*
633 (?P<email> # email address
634 """ + self.email_pattern + r"""
635 )
636 )
637 %(end_string_suffix)s
638 """) % args, re.VERBOSE),
639 pep=re.compile(
640 r"""
641 %(start_string_prefix)s
642 (
643 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
644 |
645 (PEP\s+(?P<pepnum2>\d+)) # reference by name
646 )
647 %(end_string_suffix)s""" % args, re.VERBOSE),
648 rfc=re.compile(
649 r"""
650 %(start_string_prefix)s
651 (RFC(-|\s+)?(?P<rfcnum>\d+))
652 %(end_string_suffix)s""" % args, re.VERBOSE))
653
654 self.implicit_dispatch.append((self.patterns.uri,
655 self.standalone_uri))
656 if settings.pep_references:
657 self.implicit_dispatch.append((self.patterns.pep,
658 self.pep_reference))
659 if settings.rfc_references:
660 self.implicit_dispatch.append((self.patterns.rfc,
661 self.rfc_reference))
662
663 def parse(self, text, lineno, memo, parent):
664 # Needs to be refactored for nested inline markup.
665 # Add nested_parse() method?
666 """
667 Return 2 lists: nodes (text and inline elements), and system_messages.
668
669 Using `self.patterns.initial`, a pattern which matches start-strings
670 (emphasis, strong, interpreted, phrase reference, literal,
671 substitution reference, and inline target) and complete constructs
672 (simple reference, footnote reference), search for a candidate. When
673 one is found, check for validity (e.g., not a quoted '*' character).
674 If valid, search for the corresponding end string if applicable, and
675 check it for validity. If not found or invalid, generate a warning
676 and ignore the start-string. Implicit inline markup (e.g. standalone
677 URIs) is found last.
678
679 :text: source string
680 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`
681 """
682 self.document = memo.document
683 self.language = memo.language
684 self.reporter = self.document.reporter
685 self.parent = parent
686 pattern_search = self.patterns.initial.search
687 dispatch = self.dispatch
688 remaining = escape2null(text)
689 processed = []
690 unprocessed = []
691 messages = []
692 while remaining:
693 match = pattern_search(remaining)
694 if match:
695 groups = match.groupdict()
696 method = dispatch[groups['start'] or groups['backquote']
697 or groups['refend'] or groups['fnend']]
698 before, inlines, remaining, sysmessages = method(self, match,
699 lineno)
700 unprocessed.append(before)
701 messages += sysmessages
702 if inlines:
703 processed += self.implicit_inline(''.join(unprocessed),
704 lineno)
705 processed += inlines
706 unprocessed = []
707 else:
708 break
709 remaining = ''.join(unprocessed) + remaining
710 if remaining:
711 processed += self.implicit_inline(remaining, lineno)
712 return processed, messages
713
714 # Inline object recognition
715 # -------------------------
716 # See also init_customizations().
717 non_whitespace_before = r'(?<!\s)'
718 non_whitespace_escape_before = r'(?<![\s\x00])'
719 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'
720 non_whitespace_after = r'(?!\s)'
721 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
722 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
723 # Valid URI characters (see RFC 2396 & RFC 2732);
724 # final \x00 allows backslash escapes in URIs:
725 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
726 # Delimiter indicating the end of a URI (not part of the URI):
727 uri_end_delim = r"""[>]"""
728 # Last URI character; same as uric but no punctuation:
729 urilast = r"""[_~*/=+a-zA-Z0-9]"""
730 # End of a URI (either 'urilast' or 'uric followed by a
731 # uri_end_delim'):
732 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
733 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
734 email_pattern = r"""
735 %(emailc)s+(?:\.%(emailc)s+)* # name
736 (?<!\x00)@ # at
737 %(emailc)s+(?:\.%(emailc)s*)* # host
738 %(uri_end)s # final URI char
739 """
740
741 def quoted_start(self, match):
742 """Test if inline markup start-string is 'quoted'.
743
744 'Quoted' in this context means the start-string is enclosed in a pair
745 of matching opening/closing delimiters (not necessarily quotes)
746 or at the end of the match.
747 """
748 string = match.string
749 start = match.start()
750 if start == 0: # start-string at beginning of text
751 return False
752 prestart = string[start - 1]
753 try:
754 poststart = string[match.end()]
755 except IndexError: # start-string at end of text
756 return True # not "quoted" but no markup start-string either
757 return punctuation_chars.match_chars(prestart, poststart)
758
759 def inline_obj(self, match, lineno, end_pattern, nodeclass,
760 restore_backslashes=False):
761 string = match.string
762 matchstart = match.start('start')
763 matchend = match.end('start')
764 if self.quoted_start(match):
765 return string[:matchend], [], string[matchend:], [], ''
766 endmatch = end_pattern.search(string[matchend:])
767 if endmatch and endmatch.start(1): # 1 or more chars
768 text = endmatch.string[:endmatch.start(1)]
769 if restore_backslashes:
770 text = unescape(text, True)
771 textend = matchend + endmatch.end(1)
772 rawsource = unescape(string[matchstart:textend], True)
773 node = nodeclass(rawsource, text)
774 return (string[:matchstart], [node],
775 string[textend:], [], endmatch.group(1))
776 msg = self.reporter.warning(
777 'Inline %s start-string without end-string.'
778 % nodeclass.__name__, line=lineno)
779 text = unescape(string[matchstart:matchend], True)
780 prb = self.problematic(text, text, msg)
781 return string[:matchstart], [prb], string[matchend:], [msg], ''
782
783 def problematic(self, text, rawsource, message):
784 msgid = self.document.set_id(message, self.parent)
785 problematic = nodes.problematic(rawsource, text, refid=msgid)
786 prbid = self.document.set_id(problematic)
787 message.add_backref(prbid)
788 return problematic
789
790 def emphasis(self, match, lineno):
791 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
792 match, lineno, self.patterns.emphasis, nodes.emphasis)
793 return before, inlines, remaining, sysmessages
794
795 def strong(self, match, lineno):
796 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
797 match, lineno, self.patterns.strong, nodes.strong)
798 return before, inlines, remaining, sysmessages
799
800 def interpreted_or_phrase_ref(self, match, lineno):
801 end_pattern = self.patterns.interpreted_or_phrase_ref
802 string = match.string
803 matchstart = match.start('backquote')
804 matchend = match.end('backquote')
805 rolestart = match.start('role')
806 role = match.group('role')
807 position = ''
808 if role:
809 role = role[1:-1]
810 position = 'prefix'
811 elif self.quoted_start(match):
812 return string[:matchend], [], string[matchend:], []
813 endmatch = end_pattern.search(string[matchend:])
814 if endmatch and endmatch.start(1): # 1 or more chars
815 textend = matchend + endmatch.end()
816 if endmatch.group('role'):
817 if role:
818 msg = self.reporter.warning(
819 'Multiple roles in interpreted text (both '
820 'prefix and suffix present; only one allowed).',
821 line=lineno)
822 text = unescape(string[rolestart:textend], True)
823 prb = self.problematic(text, text, msg)
824 return string[:rolestart], [prb], string[textend:], [msg]
825 role = endmatch.group('suffix')[1:-1]
826 position = 'suffix'
827 escaped = endmatch.string[:endmatch.start(1)]
828 rawsource = unescape(string[matchstart:textend], True)
829 if rawsource[-1:] == '_':
830 if role:
831 msg = self.reporter.warning(
832 'Mismatch: both interpreted text role %s and '
833 'reference suffix.' % position, line=lineno)
834 text = unescape(string[rolestart:textend], True)
835 prb = self.problematic(text, text, msg)
836 return string[:rolestart], [prb], string[textend:], [msg]
837 return self.phrase_ref(string[:matchstart], string[textend:],
838 rawsource, escaped)
839 else:
840 rawsource = unescape(string[rolestart:textend], True)
841 nodelist, messages = self.interpreted(rawsource, escaped, role,
842 lineno)
843 return (string[:rolestart], nodelist,
844 string[textend:], messages)
845 msg = self.reporter.warning(
846 'Inline interpreted text or phrase reference start-string '
847 'without end-string.', line=lineno)
848 text = unescape(string[matchstart:matchend], True)
849 prb = self.problematic(text, text, msg)
850 return string[:matchstart], [prb], string[matchend:], [msg]
851
852 def phrase_ref(self, before, after, rawsource, escaped, text=None):
853 # `text` is ignored (since 0.16)
854 match = self.patterns.embedded_link.search(escaped)
855 if match: # embedded <URI> or <alias_>
856 text = escaped[:match.start(0)]
857 unescaped = unescape(text)
858 rawtext = unescape(text, True)
859 aliastext = match.group(2)
860 rawaliastext = unescape(aliastext, True)
861 underscore_escaped = rawaliastext.endswith(r'\_')
862 if (aliastext.endswith('_')
863 and not (underscore_escaped
864 or self.patterns.uri.match(aliastext))):
865 aliastype = 'name'
866 alias = normalize_name(unescape(aliastext[:-1]))
867 target = nodes.target(match.group(1), refname=alias)
868 target.indirect_reference_name = whitespace_normalize_name(
869 unescape(aliastext[:-1]))
870 else:
871 aliastype = 'uri'
872 # remove unescaped whitespace
873 alias_parts = split_escaped_whitespace(match.group(2))
874 alias = ' '.join(''.join(part.split())
875 for part in alias_parts)
876 alias = self.adjust_uri(unescape(alias))
877 if alias.endswith(r'\_'):
878 alias = alias[:-2] + '_'
879 target = nodes.target(match.group(1), refuri=alias)
880 target.referenced = 1
881 if not aliastext:
882 raise ApplicationError('problem with embedded link: %r'
883 % aliastext)
884 if not text:
885 text = alias
886 unescaped = unescape(text)
887 rawtext = rawaliastext
888 else:
889 text = escaped
890 unescaped = unescape(text)
891 target = None
892 rawtext = unescape(escaped, True)
893
894 refname = normalize_name(unescaped)
895 reference = nodes.reference(rawsource, text,
896 name=whitespace_normalize_name(unescaped))
897 reference[0].rawsource = rawtext
898
899 node_list = [reference]
900
901 if rawsource[-2:] == '__':
902 if target and (aliastype == 'name'):
903 reference['refname'] = alias
904 self.document.note_refname(reference)
905 # self.document.note_indirect_target(target) # required?
906 elif target and (aliastype == 'uri'):
907 reference['refuri'] = alias
908 else:
909 reference['anonymous'] = True
910 else:
911 if target:
912 target['names'].append(refname)
913 if aliastype == 'name':
914 reference['refname'] = alias
915 self.document.note_indirect_target(target)
916 self.document.note_refname(reference)
917 else:
918 reference['refuri'] = alias
919 # target.note_referenced_by(name=refname)
920 self.document.note_implicit_target(target, self.parent)
921 node_list.append(target)
922 else:
923 reference['refname'] = refname
924 self.document.note_refname(reference)
925 return before, node_list, after, []
926
927 def adjust_uri(self, uri):
928 match = self.patterns.email.match(uri)
929 if match:
930 return 'mailto:' + uri
931 else:
932 return uri
933
934 def interpreted(self, rawsource, text, role, lineno):
935 role_fn, messages = roles.role(role, self.language, lineno,
936 self.reporter)
937 if role_fn:
938 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
939 return nodes, messages + messages2
940 else:
941 msg = self.reporter.error(
942 'Unknown interpreted text role "%s".' % role,
943 line=lineno)
944 return ([self.problematic(rawsource, rawsource, msg)],
945 messages + [msg])
946
947 def literal(self, match, lineno):
948 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
949 match, lineno, self.patterns.literal, nodes.literal,
950 restore_backslashes=True)
951 return before, inlines, remaining, sysmessages
952
953 def inline_internal_target(self, match, lineno):
954 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
955 match, lineno, self.patterns.target, nodes.target)
956 if inlines and isinstance(inlines[0], nodes.target):
957 assert len(inlines) == 1
958 target = inlines[0]
959 name = normalize_name(target.astext())
960 target['names'].append(name)
961 self.document.note_explicit_target(target, self.parent)
962 return before, inlines, remaining, sysmessages
963
964 def substitution_reference(self, match, lineno):
965 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
966 match, lineno, self.patterns.substitution_ref,
967 nodes.substitution_reference)
968 if len(inlines) == 1:
969 subref_node = inlines[0]
970 if isinstance(subref_node, nodes.substitution_reference):
971 subref_text = subref_node.astext()
972 self.document.note_substitution_ref(subref_node, subref_text)
973 if endstring[-1:] == '_':
974 reference_node = nodes.reference(
975 '|%s%s' % (subref_text, endstring), '')
976 if endstring[-2:] == '__':
977 reference_node['anonymous'] = True
978 else:
979 reference_node['refname'] = normalize_name(subref_text)
980 self.document.note_refname(reference_node)
981 reference_node += subref_node
982 inlines = [reference_node]
983 return before, inlines, remaining, sysmessages
984
985 def footnote_reference(self, match, lineno):
986 """
987 Handles `nodes.footnote_reference` and `nodes.citation_reference`
988 elements.
989 """
990 label = match.group('footnotelabel')
991 refname = normalize_name(label)
992 string = match.string
993 before = string[:match.start('whole')]
994 remaining = string[match.end('whole'):]
995 if match.group('citationlabel'):
996 refnode = nodes.citation_reference('[%s]_' % label,
997 refname=refname)
998 refnode += nodes.Text(label)
999 self.document.note_citation_ref(refnode)
1000 else:
1001 refnode = nodes.footnote_reference('[%s]_' % label)
1002 if refname[0] == '#':
1003 refname = refname[1:]
1004 refnode['auto'] = 1
1005 self.document.note_autofootnote_ref(refnode)
1006 elif refname == '*':
1007 refname = ''
1008 refnode['auto'] = '*'
1009 self.document.note_symbol_footnote_ref(
1010 refnode)
1011 else:
1012 refnode += nodes.Text(label)
1013 if refname:
1014 refnode['refname'] = refname
1015 self.document.note_footnote_ref(refnode)
1016 if utils.get_trim_footnote_ref_space(self.document.settings):
1017 before = before.rstrip()
1018 return before, [refnode], remaining, []
1019
1020 def reference(self, match, lineno, anonymous=False):
1021 referencename = match.group('refname')
1022 refname = normalize_name(referencename)
1023 referencenode = nodes.reference(
1024 referencename + match.group('refend'), referencename,
1025 name=whitespace_normalize_name(referencename))
1026 referencenode[0].rawsource = referencename
1027 if anonymous:
1028 referencenode['anonymous'] = True
1029 else:
1030 referencenode['refname'] = refname
1031 self.document.note_refname(referencenode)
1032 string = match.string
1033 matchstart = match.start('whole')
1034 matchend = match.end('whole')
1035 return string[:matchstart], [referencenode], string[matchend:], []
1036
1037 def anonymous_reference(self, match, lineno):
1038 return self.reference(match, lineno, anonymous=True)
1039
1040 def standalone_uri(self, match, lineno):
1041 if (not match.group('scheme')
1042 or match.group('scheme').lower() in urischemes.schemes):
1043 if match.group('email'):
1044 addscheme = 'mailto:'
1045 else:
1046 addscheme = ''
1047 text = match.group('whole')
1048 refuri = addscheme + unescape(text)
1049 reference = nodes.reference(unescape(text, True), text,
1050 refuri=refuri)
1051 return [reference]
1052 else: # not a valid scheme
1053 raise MarkupMismatch
1054
1055 def pep_reference(self, match, lineno):
1056 text = match.group(0)
1057 if text.startswith('pep-'):
1058 pepnum = int(unescape(match.group('pepnum1')))
1059 elif text.startswith('PEP'):
1060 pepnum = int(unescape(match.group('pepnum2')))
1061 else:
1062 raise MarkupMismatch
1063 ref = (self.document.settings.pep_base_url
1064 + self.document.settings.pep_file_url_template % pepnum)
1065 return [nodes.reference(unescape(text, True), text, refuri=ref)]
1066
1067 rfc_url = 'rfc%d.html'
1068
1069 def rfc_reference(self, match, lineno):
1070 text = match.group(0)
1071 if text.startswith('RFC'):
1072 rfcnum = int(unescape(match.group('rfcnum')))
1073 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
1074 else:
1075 raise MarkupMismatch
1076 return [nodes.reference(unescape(text, True), text, refuri=ref)]
1077
1078 def implicit_inline(self, text, lineno):
1079 """
1080 Check each of the patterns in `self.implicit_dispatch` for a match,
1081 and dispatch to the stored method for the pattern. Recursively check
1082 the text before and after the match. Return a list of `nodes.Text`
1083 and inline element nodes.
1084 """
1085 if not text:
1086 return []
1087 for pattern, method in self.implicit_dispatch:
1088 match = pattern.search(text)
1089 if match:
1090 try:
1091 # Must recurse on strings before *and* after the match;
1092 # there may be multiple patterns.
1093 return (self.implicit_inline(text[:match.start()], lineno)
1094 + method(match, lineno)
1095 + self.implicit_inline(text[match.end():], lineno))
1096 except MarkupMismatch:
1097 pass
1098 return [nodes.Text(text)]
1099
1100 dispatch = {'*': emphasis,
1101 '**': strong,
1102 '`': interpreted_or_phrase_ref,
1103 '``': literal,
1104 '_`': inline_internal_target,
1105 ']_': footnote_reference,
1106 '|': substitution_reference,
1107 '_': reference,
1108 '__': anonymous_reference}
1109
1110
1111def _loweralpha_to_int(s, _zero=(ord('a')-1)):
1112 return ord(s) - _zero
1113
1114
1115def _upperalpha_to_int(s, _zero=(ord('A')-1)):
1116 return ord(s) - _zero
1117
1118
1119class Body(RSTState):
1120
1121 """
1122 Generic classifier of the first line of a block.
1123 """
1124
1125 double_width_pad_char = tableparser.TableParser.double_width_pad_char
1126 """Padding character for East Asian double-width text."""
1127
1128 enum = Struct()
1129 """Enumerated list parsing information."""
1130
1131 enum.formatinfo = {
1132 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
1133 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
1134 'period': Struct(prefix='', suffix='.', start=0, end=-1)}
1135 enum.formats = enum.formatinfo.keys()
1136 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
1137 'lowerroman', 'upperroman'] # ORDERED!
1138 enum.sequencepats = {'arabic': '[0-9]+',
1139 'loweralpha': '[a-z]',
1140 'upperalpha': '[A-Z]',
1141 'lowerroman': '[ivxlcdm]+',
1142 'upperroman': '[IVXLCDM]+'}
1143 enum.converters = {'arabic': int,
1144 'loweralpha': _loweralpha_to_int,
1145 'upperalpha': _upperalpha_to_int,
1146 'lowerroman': RomanNumeral.from_string,
1147 'upperroman': RomanNumeral.from_string}
1148
1149 enum.sequenceregexps = {}
1150 for sequence in enum.sequences:
1151 enum.sequenceregexps[sequence] = re.compile(
1152 enum.sequencepats[sequence] + '$')
1153
1154 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
1155 """Matches the top (& bottom) of a full table)."""
1156
1157 simple_table_top_pat = re.compile('=+( +=+)+ *$')
1158 """Matches the top of a simple table."""
1159
1160 simple_table_border_pat = re.compile('=+[ =]*$')
1161 """Matches the bottom & header bottom of a simple table."""
1162
1163 pats = {}
1164 """Fragments of patterns used by transitions."""
1165
1166 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1167 pats['alpha'] = '[a-zA-Z]'
1168 pats['alphanum'] = '[a-zA-Z0-9]'
1169 pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1170 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1171 '|%(upperroman)s|#)' % enum.sequencepats)
1172 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1173 # @@@ Loosen up the pattern? Allow Unicode?
1174 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1175 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1176 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1177 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1178
1179 for format in enum.formats:
1180 pats[format] = '(?P<%s>%s%s%s)' % (
1181 format, re.escape(enum.formatinfo[format].prefix),
1182 pats['enum'], re.escape(enum.formatinfo[format].suffix))
1183
1184 patterns = {
1185 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',
1186 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1187 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',
1188 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
1189 'doctest': r'>>>( +|$)',
1190 'line_block': r'\|( +|$)',
1191 'grid_table_top': grid_table_top_pat,
1192 'simple_table_top': simple_table_top_pat,
1193 'explicit_markup': r'\.\.( +|$)',
1194 'anonymous': r'__( +|$)',
1195 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
1196 'text': r''}
1197 initial_transitions = (
1198 'bullet',
1199 'enumerator',
1200 'field_marker',
1201 'option_marker',
1202 'doctest',
1203 'line_block',
1204 'grid_table_top',
1205 'simple_table_top',
1206 'explicit_markup',
1207 'anonymous',
1208 'line',
1209 'text')
1210
1211 def indent(self, match, context, next_state):
1212 """Block quote."""
1213 (indented, indent, line_offset, blank_finish
1214 ) = self.state_machine.get_indented()
1215 elements = self.block_quote(indented, line_offset)
1216 self.parent += elements
1217 if not blank_finish:
1218 self.parent += self.unindent_warning('Block quote')
1219 return context, next_state, []
1220
1221 def block_quote(self, indented, line_offset):
1222 elements = []
1223 while indented:
1224 blockquote = nodes.block_quote(rawsource='\n'.join(indented))
1225 (blockquote.source, blockquote.line
1226 ) = self.state_machine.get_source_and_line(line_offset+1)
1227 (blockquote_lines,
1228 attribution_lines,
1229 attribution_offset,
1230 indented,
1231 new_line_offset) = self.split_attribution(indented, line_offset)
1232 self.nested_parse(blockquote_lines, line_offset, blockquote)
1233 elements.append(blockquote)
1234 if attribution_lines:
1235 attribution, messages = self.parse_attribution(
1236 attribution_lines, line_offset+attribution_offset)
1237 blockquote += attribution
1238 elements += messages
1239 line_offset = new_line_offset
1240 while indented and not indented[0]:
1241 indented = indented[1:]
1242 line_offset += 1
1243 return elements
1244
1245 # U+2014 is an em-dash:
1246 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')
1247
1248 def split_attribution(self, indented, line_offset):
1249 """
1250 Check for a block quote attribution and split it off:
1251
1252 * First line after a blank line must begin with a dash ("--", "---",
1253 em-dash; matches `self.attribution_pattern`).
1254 * Every line after that must have consistent indentation.
1255 * Attributions must be preceded by block quote content.
1256
1257 Return a tuple of: (block quote content lines, attribution lines,
1258 attribution offset, remaining indented lines, remaining lines offset).
1259 """
1260 blank = None
1261 nonblank_seen = False
1262 for i in range(len(indented)):
1263 line = indented[i].rstrip()
1264 if line:
1265 if nonblank_seen and blank == i - 1: # last line blank
1266 match = self.attribution_pattern.match(line)
1267 if match:
1268 attribution_end, indent = self.check_attribution(
1269 indented, i)
1270 if attribution_end:
1271 a_lines = indented[i:attribution_end]
1272 a_lines.trim_left(match.end(), end=1)
1273 a_lines.trim_left(indent, start=1)
1274 return (indented[:i], a_lines,
1275 i, indented[attribution_end:],
1276 line_offset + attribution_end)
1277 nonblank_seen = True
1278 else:
1279 blank = i
1280 else:
1281 return indented, None, None, None, None
1282
1283 def check_attribution(self, indented, attribution_start):
1284 """
1285 Check attribution shape.
1286 Return the index past the end of the attribution, and the indent.
1287 """
1288 indent = None
1289 i = attribution_start + 1
1290 for i in range(attribution_start + 1, len(indented)):
1291 line = indented[i].rstrip()
1292 if not line:
1293 break
1294 if indent is None:
1295 indent = len(line) - len(line.lstrip())
1296 elif len(line) - len(line.lstrip()) != indent:
1297 return None, None # bad shape; not an attribution
1298 else:
1299 # return index of line after last attribution line:
1300 i += 1
1301 return i, (indent or 0)
1302
1303 def parse_attribution(self, indented, line_offset):
1304 text = '\n'.join(indented).rstrip()
1305 lineno = 1 + line_offset # line_offset is zero-based
1306 textnodes, messages = self.inline_text(text, lineno)
1307 node = nodes.attribution(text, '', *textnodes)
1308 node.source, node.line = self.state_machine.get_source_and_line(lineno)
1309 return node, messages
1310
1311 def bullet(self, match, context, next_state):
1312 """Bullet list item."""
1313 ul = nodes.bullet_list()
1314 ul.source, ul.line = self.state_machine.get_source_and_line()
1315 self.parent += ul
1316 ul['bullet'] = match.string[0]
1317 i, blank_finish = self.list_item(match.end())
1318 ul += i
1319 offset = self.state_machine.line_offset + 1 # next line
1320 new_line_offset, blank_finish = self.nested_list_parse(
1321 self.state_machine.input_lines[offset:],
1322 input_offset=self.state_machine.abs_line_offset() + 1,
1323 node=ul, initial_state='BulletList',
1324 blank_finish=blank_finish)
1325 self.goto_line(new_line_offset)
1326 if not blank_finish:
1327 self.parent += self.unindent_warning('Bullet list')
1328 return [], next_state, []
1329
1330 def list_item(self, indent):
1331 src, srcline = self.state_machine.get_source_and_line()
1332 if self.state_machine.line[indent:]:
1333 indented, line_offset, blank_finish = (
1334 self.state_machine.get_known_indented(indent))
1335 else:
1336 indented, indent, line_offset, blank_finish = (
1337 self.state_machine.get_first_known_indented(indent))
1338 listitem = nodes.list_item('\n'.join(indented))
1339 listitem.source, listitem.line = src, srcline
1340 if indented:
1341 self.nested_parse(indented, input_offset=line_offset,
1342 node=listitem)
1343 return listitem, blank_finish
1344
1345 def enumerator(self, match, context, next_state):
1346 """Enumerated List Item"""
1347 format, sequence, text, ordinal = self.parse_enumerator(match)
1348 if not self.is_enumerated_list_item(ordinal, sequence, format):
1349 raise statemachine.TransitionCorrection('text')
1350 enumlist = nodes.enumerated_list()
1351 (enumlist.source,
1352 enumlist.line) = self.state_machine.get_source_and_line()
1353 self.parent += enumlist
1354 if sequence == '#':
1355 enumlist['enumtype'] = 'arabic'
1356 else:
1357 enumlist['enumtype'] = sequence
1358 enumlist['prefix'] = self.enum.formatinfo[format].prefix
1359 enumlist['suffix'] = self.enum.formatinfo[format].suffix
1360 if ordinal != 1:
1361 enumlist['start'] = ordinal
1362 msg = self.reporter.info(
1363 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1364 % (text, ordinal), base_node=enumlist)
1365 self.parent += msg
1366 listitem, blank_finish = self.list_item(match.end())
1367 enumlist += listitem
1368 offset = self.state_machine.line_offset + 1 # next line
1369 newline_offset, blank_finish = self.nested_list_parse(
1370 self.state_machine.input_lines[offset:],
1371 input_offset=self.state_machine.abs_line_offset() + 1,
1372 node=enumlist, initial_state='EnumeratedList',
1373 blank_finish=blank_finish,
1374 extra_settings={'lastordinal': ordinal,
1375 'format': format,
1376 'auto': sequence == '#'})
1377 self.goto_line(newline_offset)
1378 if not blank_finish:
1379 self.parent += self.unindent_warning('Enumerated list')
1380 return [], next_state, []
1381
1382 def parse_enumerator(self, match, expected_sequence=None):
1383 """
1384 Analyze an enumerator and return the results.
1385
1386 :Return:
1387 - the enumerator format ('period', 'parens', or 'rparen'),
1388 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1389 - the text of the enumerator, stripped of formatting, and
1390 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1391 ``None`` is returned for invalid enumerator text).
1392
1393 The enumerator format has already been determined by the regular
1394 expression match. If `expected_sequence` is given, that sequence is
1395 tried first. If not, we check for Roman numeral 1. This way,
1396 single-character Roman numerals (which are also alphabetical) can be
1397 matched. If no sequence has been matched, all sequences are checked in
1398 order.
1399 """
1400 groupdict = match.groupdict()
1401 sequence = ''
1402 for format in self.enum.formats:
1403 if groupdict[format]: # was this the format matched?
1404 break # yes; keep `format`
1405 else: # shouldn't happen
1406 raise ParserError('enumerator format not matched')
1407 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501
1408 : self.enum.formatinfo[format].end]
1409 if text == '#':
1410 sequence = '#'
1411 elif expected_sequence:
1412 try:
1413 if self.enum.sequenceregexps[expected_sequence].match(text):
1414 sequence = expected_sequence
1415 except KeyError: # shouldn't happen
1416 raise ParserError('unknown enumerator sequence: %s'
1417 % sequence)
1418 elif text == 'i':
1419 sequence = 'lowerroman'
1420 elif text == 'I':
1421 sequence = 'upperroman'
1422 if not sequence:
1423 for sequence in self.enum.sequences:
1424 if self.enum.sequenceregexps[sequence].match(text):
1425 break
1426 else: # shouldn't happen
1427 raise ParserError('enumerator sequence not matched')
1428 if sequence == '#':
1429 ordinal = 1
1430 else:
1431 try:
1432 ordinal = int(self.enum.converters[sequence](text))
1433 except InvalidRomanNumeralError:
1434 ordinal = None
1435 return format, sequence, text, ordinal
1436
1437 def is_enumerated_list_item(self, ordinal, sequence, format):
1438 """
1439 Check validity based on the ordinal value and the second line.
1440
1441 Return true if the ordinal is valid and the second line is blank,
1442 indented, or starts with the next enumerator or an auto-enumerator.
1443 """
1444 if ordinal is None:
1445 return None
1446 try:
1447 next_line = self.state_machine.next_line()
1448 except EOFError: # end of input lines
1449 self.state_machine.previous_line()
1450 return 1
1451 else:
1452 self.state_machine.previous_line()
1453 if not next_line[:1].strip(): # blank or indented
1454 return 1
1455 result = self.make_enumerator(ordinal + 1, sequence, format)
1456 if result:
1457 next_enumerator, auto_enumerator = result
1458 try:
1459 if next_line.startswith((next_enumerator, auto_enumerator)):
1460 return 1
1461 except TypeError:
1462 pass
1463 return None
1464
1465 def make_enumerator(self, ordinal, sequence, format):
1466 """
1467 Construct and return the next enumerated list item marker, and an
1468 auto-enumerator ("#" instead of the regular enumerator).
1469
1470 Return ``None`` for invalid (out of range) ordinals.
1471 """
1472 if sequence == '#':
1473 enumerator = '#'
1474 elif sequence == 'arabic':
1475 enumerator = str(ordinal)
1476 else:
1477 if sequence.endswith('alpha'):
1478 if ordinal > 26:
1479 return None
1480 enumerator = chr(ordinal + ord('a') - 1)
1481 elif sequence.endswith('roman'):
1482 try:
1483 enumerator = RomanNumeral(ordinal).to_uppercase()
1484 except TypeError:
1485 return None
1486 else: # shouldn't happen
1487 raise ParserError('unknown enumerator sequence: "%s"'
1488 % sequence)
1489 if sequence.startswith('lower'):
1490 enumerator = enumerator.lower()
1491 elif sequence.startswith('upper'):
1492 enumerator = enumerator.upper()
1493 else: # shouldn't happen
1494 raise ParserError('unknown enumerator sequence: "%s"'
1495 % sequence)
1496 formatinfo = self.enum.formatinfo[format]
1497 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
1498 + ' ')
1499 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
1500 return next_enumerator, auto_enumerator
1501
1502 def field_marker(self, match, context, next_state):
1503 """Field list item."""
1504 field_list = nodes.field_list()
1505 self.parent += field_list
1506 field, blank_finish = self.field(match)
1507 field_list += field
1508 offset = self.state_machine.line_offset + 1 # next line
1509 newline_offset, blank_finish = self.nested_list_parse(
1510 self.state_machine.input_lines[offset:],
1511 input_offset=self.state_machine.abs_line_offset() + 1,
1512 node=field_list, initial_state='FieldList',
1513 blank_finish=blank_finish)
1514 self.goto_line(newline_offset)
1515 if not blank_finish:
1516 self.parent += self.unindent_warning('Field list')
1517 return [], next_state, []
1518
1519 def field(self, match):
1520 name = self.parse_field_marker(match)
1521 src, srcline = self.state_machine.get_source_and_line()
1522 lineno = self.state_machine.abs_line_number()
1523 (indented, indent, line_offset, blank_finish
1524 ) = self.state_machine.get_first_known_indented(match.end())
1525 field_node = nodes.field()
1526 field_node.source = src
1527 field_node.line = srcline
1528 name_nodes, name_messages = self.inline_text(name, lineno)
1529 field_node += nodes.field_name(name, '', *name_nodes)
1530 field_body = nodes.field_body('\n'.join(indented), *name_messages)
1531 field_node += field_body
1532 if indented:
1533 self.parse_field_body(indented, line_offset, field_body)
1534 return field_node, blank_finish
1535
1536 def parse_field_marker(self, match):
1537 """Extract & return field name from a field marker match."""
1538 field = match.group()[1:] # strip off leading ':'
1539 field = field[:field.rfind(':')] # strip off trailing ':' etc.
1540 return field
1541
1542 def parse_field_body(self, indented, offset, node) -> None:
1543 self.nested_parse(indented, input_offset=offset, node=node)
1544
1545 def option_marker(self, match, context, next_state):
1546 """Option list item."""
1547 optionlist = nodes.option_list()
1548 (optionlist.source, optionlist.line
1549 ) = self.state_machine.get_source_and_line()
1550 try:
1551 listitem, blank_finish = self.option_list_item(match)
1552 except MarkupError as error:
1553 # This shouldn't happen; pattern won't match.
1554 msg = self.reporter.error('Invalid option list marker: %s'
1555 % error)
1556 self.parent += msg
1557 (indented, indent, line_offset, blank_finish
1558 ) = self.state_machine.get_first_known_indented(match.end())
1559 elements = self.block_quote(indented, line_offset)
1560 self.parent += elements
1561 if not blank_finish:
1562 self.parent += self.unindent_warning('Option list')
1563 return [], next_state, []
1564 self.parent += optionlist
1565 optionlist += listitem
1566 offset = self.state_machine.line_offset + 1 # next line
1567 newline_offset, blank_finish = self.nested_list_parse(
1568 self.state_machine.input_lines[offset:],
1569 input_offset=self.state_machine.abs_line_offset() + 1,
1570 node=optionlist, initial_state='OptionList',
1571 blank_finish=blank_finish)
1572 self.goto_line(newline_offset)
1573 if not blank_finish:
1574 self.parent += self.unindent_warning('Option list')
1575 return [], next_state, []
1576
1577 def option_list_item(self, match):
1578 offset = self.state_machine.abs_line_offset()
1579 options = self.parse_option_marker(match)
1580 (indented, indent, line_offset, blank_finish
1581 ) = self.state_machine.get_first_known_indented(match.end())
1582 if not indented: # not an option list item
1583 self.goto_line(offset)
1584 raise statemachine.TransitionCorrection('text')
1585 option_group = nodes.option_group('', *options)
1586 description = nodes.description('\n'.join(indented))
1587 option_list_item = nodes.option_list_item('', option_group,
1588 description)
1589 if indented:
1590 self.nested_parse(indented, input_offset=line_offset,
1591 node=description)
1592 return option_list_item, blank_finish
1593
1594 def parse_option_marker(self, match):
1595 """
1596 Return a list of `node.option` and `node.option_argument` objects,
1597 parsed from an option marker match.
1598
1599 :Exception: `MarkupError` for invalid option markers.
1600 """
1601 optlist = []
1602 # split at ", ", except inside < > (complex arguments)
1603 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())
1604 for optionstring in optionstrings:
1605 tokens = optionstring.split()
1606 delimiter = ' '
1607 firstopt = tokens[0].split('=', 1)
1608 if len(firstopt) > 1:
1609 # "--opt=value" form
1610 tokens[:1] = firstopt
1611 delimiter = '='
1612 elif (len(tokens[0]) > 2
1613 and ((tokens[0].startswith('-')
1614 and not tokens[0].startswith('--'))
1615 or tokens[0].startswith('+'))):
1616 # "-ovalue" form
1617 tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1618 delimiter = ''
1619 if len(tokens) > 1 and (tokens[1].startswith('<')
1620 and tokens[-1].endswith('>')):
1621 # "-o <value1 value2>" form; join all values into one token
1622 tokens[1:] = [' '.join(tokens[1:])]
1623 if 0 < len(tokens) <= 2:
1624 option = nodes.option(optionstring)
1625 option += nodes.option_string(tokens[0], tokens[0])
1626 if len(tokens) > 1:
1627 option += nodes.option_argument(tokens[1], tokens[1],
1628 delimiter=delimiter)
1629 optlist.append(option)
1630 else:
1631 raise MarkupError(
1632 'wrong number of option tokens (=%s), should be 1 or 2: '
1633 '"%s"' % (len(tokens), optionstring))
1634 return optlist
1635
1636 def doctest(self, match, context, next_state):
1637 line = self.document.current_line
1638 data = '\n'.join(self.state_machine.get_text_block())
1639 # TODO: Parse with `directives.body.CodeBlock` with
1640 # argument 'pycon' (Python Console) in Docutils 1.0.
1641 n = nodes.doctest_block(data, data)
1642 n.line = line
1643 self.parent += n
1644 return [], next_state, []
1645
1646 def line_block(self, match, context, next_state):
1647 """First line of a line block."""
1648 block = nodes.line_block()
1649 self.parent += block
1650 lineno = self.state_machine.abs_line_number()
1651 (block.source,
1652 block.line) = self.state_machine.get_source_and_line(lineno)
1653 line, messages, blank_finish = self.line_block_line(match, lineno)
1654 block += line
1655 self.parent += messages
1656 if not blank_finish:
1657 offset = self.state_machine.line_offset + 1 # next line
1658 new_line_offset, blank_finish = self.nested_list_parse(
1659 self.state_machine.input_lines[offset:],
1660 input_offset=self.state_machine.abs_line_offset() + 1,
1661 node=block, initial_state='LineBlock',
1662 blank_finish=False)
1663 self.goto_line(new_line_offset)
1664 if not blank_finish:
1665 self.parent += self.reporter.warning(
1666 'Line block ends without a blank line.',
1667 line=lineno+1)
1668 if len(block):
1669 if block[0].indent is None:
1670 block[0].indent = 0
1671 self.nest_line_block_lines(block)
1672 return [], next_state, []
1673
1674 def line_block_line(self, match, lineno):
1675 """Return one line element of a line_block."""
1676 (indented, indent, line_offset, blank_finish
1677 ) = self.state_machine.get_first_known_indented(match.end(),
1678 until_blank=True)
1679 text = '\n'.join(indented)
1680 text_nodes, messages = self.inline_text(text, lineno)
1681 line = nodes.line(text, '', *text_nodes)
1682 (line.source,
1683 line.line) = self.state_machine.get_source_and_line(lineno)
1684 if match.string.rstrip() != '|': # not empty
1685 line.indent = len(match.group(1)) - 1
1686 return line, messages, blank_finish
1687
1688 def nest_line_block_lines(self, block) -> None:
1689 for index in range(1, len(block)):
1690 if block[index].indent is None:
1691 block[index].indent = block[index - 1].indent
1692 self.nest_line_block_segment(block)
1693
1694 def nest_line_block_segment(self, block) -> None:
1695 indents = [item.indent for item in block]
1696 least = min(indents)
1697 new_items = []
1698 new_block = nodes.line_block()
1699 for item in block:
1700 if item.indent > least:
1701 new_block.append(item)
1702 else:
1703 if len(new_block):
1704 self.nest_line_block_segment(new_block)
1705 new_items.append(new_block)
1706 new_block = nodes.line_block()
1707 new_items.append(item)
1708 if len(new_block):
1709 self.nest_line_block_segment(new_block)
1710 new_items.append(new_block)
1711 block[:] = new_items
1712
1713 def grid_table_top(self, match, context, next_state):
1714 """Top border of a full table."""
1715 return self.table_top(match, context, next_state,
1716 self.isolate_grid_table,
1717 tableparser.GridTableParser)
1718
1719 def simple_table_top(self, match, context, next_state):
1720 """Top border of a simple table."""
1721 return self.table_top(match, context, next_state,
1722 self.isolate_simple_table,
1723 tableparser.SimpleTableParser)
1724
1725 def table_top(self, match, context, next_state,
1726 isolate_function, parser_class):
1727 """Top border of a generic table."""
1728 nodelist, blank_finish = self.table(isolate_function, parser_class)
1729 self.parent += nodelist
1730 if not blank_finish:
1731 msg = self.reporter.warning(
1732 'Blank line required after table.',
1733 line=self.state_machine.abs_line_number()+1)
1734 self.parent += msg
1735 return [], next_state, []
1736
1737 def table(self, isolate_function, parser_class):
1738 """Parse a table."""
1739 block, messages, blank_finish = isolate_function()
1740 if block:
1741 try:
1742 parser = parser_class()
1743 tabledata = parser.parse(block)
1744 tableline = (self.state_machine.abs_line_number() - len(block)
1745 + 1)
1746 table = self.build_table(tabledata, tableline)
1747 nodelist = [table] + messages
1748 except tableparser.TableMarkupError as err:
1749 nodelist = self.malformed_table(block, ' '.join(err.args),
1750 offset=err.offset) + messages
1751 else:
1752 nodelist = messages
1753 return nodelist, blank_finish
1754
1755 def isolate_grid_table(self):
1756 messages = []
1757 blank_finish = True
1758 try:
1759 block = self.state_machine.get_text_block(flush_left=True)
1760 except statemachine.UnexpectedIndentationError as err:
1761 block, src, srcline = err.args
1762 messages.append(self.reporter.error('Unexpected indentation.',
1763 source=src, line=srcline))
1764 blank_finish = False
1765 block.disconnect()
1766 # for East Asian chars:
1767 block.pad_double_width(self.double_width_pad_char)
1768 width = len(block[0].strip())
1769 for i in range(len(block)):
1770 block[i] = block[i].strip()
1771 if block[i][0] not in '+|': # check left edge
1772 blank_finish = False
1773 self.state_machine.previous_line(len(block) - i)
1774 del block[i:]
1775 break
1776 if not self.grid_table_top_pat.match(block[-1]): # find bottom
1777 # from second-last to third line of table:
1778 for i in range(len(block) - 2, 1, -1):
1779 if self.grid_table_top_pat.match(block[i]):
1780 self.state_machine.previous_line(len(block) - i + 1)
1781 del block[i+1:]
1782 blank_finish = False
1783 break
1784 else:
1785 detail = 'Bottom border missing or corrupt.'
1786 messages.extend(self.malformed_table(block, detail, i))
1787 return [], messages, blank_finish
1788 for i in range(len(block)): # check right edge
1789 if len(block[i]) != width or block[i][-1] not in '+|':
1790 detail = 'Right border not aligned or missing.'
1791 messages.extend(self.malformed_table(block, detail, i))
1792 return [], messages, blank_finish
1793 return block, messages, blank_finish
1794
1795 def isolate_simple_table(self):
1796 start = self.state_machine.line_offset
1797 lines = self.state_machine.input_lines
1798 limit = len(lines) - 1
1799 toplen = len(lines[start].strip())
1800 pattern_match = self.simple_table_border_pat.match
1801 found = 0
1802 found_at = None
1803 i = start + 1
1804 while i <= limit:
1805 line = lines[i]
1806 match = pattern_match(line)
1807 if match:
1808 if len(line.strip()) != toplen:
1809 self.state_machine.next_line(i - start)
1810 messages = self.malformed_table(
1811 lines[start:i+1], 'Bottom border or header rule does '
1812 'not match top border.', i-start)
1813 return [], messages, i == limit or not lines[i+1].strip()
1814 found += 1
1815 found_at = i
1816 if found == 2 or i == limit or not lines[i+1].strip():
1817 end = i
1818 break
1819 i += 1
1820 else: # reached end of input_lines
1821 details = 'No bottom table border found'
1822 if found:
1823 details += ' or no blank line after table bottom'
1824 self.state_machine.next_line(found_at - start)
1825 block = lines[start:found_at+1]
1826 else:
1827 self.state_machine.next_line(i - start - 1)
1828 block = lines[start:]
1829 messages = self.malformed_table(block, details + '.')
1830 return [], messages, not found
1831 self.state_machine.next_line(end - start)
1832 block = lines[start:end+1]
1833 # for East Asian chars:
1834 block.pad_double_width(self.double_width_pad_char)
1835 return block, [], end == limit or not lines[end+1].strip()
1836
1837 def malformed_table(self, block, detail='', offset=0):
1838 block.replace(self.double_width_pad_char, '')
1839 data = '\n'.join(block)
1840 message = 'Malformed table.'
1841 startline = self.state_machine.abs_line_number() - len(block) + 1
1842 if detail:
1843 message += '\n' + detail
1844 error = self.reporter.error(message, nodes.literal_block(data, data),
1845 line=startline+offset)
1846 return [error]
1847
1848 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):
1849 colwidths, headrows, bodyrows = tabledata
1850 table = nodes.table()
1851 if widths == 'auto':
1852 table['classes'] += ['colwidths-auto']
1853 elif widths: # "grid" or list of integers
1854 table['classes'] += ['colwidths-given']
1855 tgroup = nodes.tgroup(cols=len(colwidths))
1856 table += tgroup
1857 for colwidth in colwidths:
1858 colspec = nodes.colspec(colwidth=colwidth)
1859 if stub_columns:
1860 colspec.attributes['stub'] = True
1861 stub_columns -= 1
1862 tgroup += colspec
1863 if headrows:
1864 thead = nodes.thead()
1865 tgroup += thead
1866 for row in headrows:
1867 thead += self.build_table_row(row, tableline)
1868 tbody = nodes.tbody()
1869 tgroup += tbody
1870 for row in bodyrows:
1871 tbody += self.build_table_row(row, tableline)
1872 return table
1873
1874 def build_table_row(self, rowdata, tableline):
1875 row = nodes.row()
1876 for cell in rowdata:
1877 if cell is None:
1878 continue
1879 morerows, morecols, offset, cellblock = cell
1880 attributes = {}
1881 if morerows:
1882 attributes['morerows'] = morerows
1883 if morecols:
1884 attributes['morecols'] = morecols
1885 entry = nodes.entry(**attributes)
1886 row += entry
1887 if ''.join(cellblock):
1888 self.nested_parse(cellblock, input_offset=tableline+offset,
1889 node=entry)
1890 return row
1891
1892 explicit = Struct()
1893 """Patterns and constants used for explicit markup recognition."""
1894
1895 explicit.patterns = Struct(
1896 target=re.compile(r"""
1897 (
1898 _ # anonymous target
1899 | # *OR*
1900 (?!_) # no underscore at the beginning
1901 (?P<quote>`?) # optional open quote
1902 (?![ `]) # first char. not space or
1903 # backquote
1904 (?P<name> # reference name
1905 .+?
1906 )
1907 %(non_whitespace_escape_before)s
1908 (?P=quote) # close quote if open quote used
1909 )
1910 (?<!(?<!\x00):) # no unescaped colon at end
1911 %(non_whitespace_escape_before)s
1912 [ ]? # optional space
1913 : # end of reference name
1914 ([ ]+|$) # followed by whitespace
1915 """ % vars(Inliner), re.VERBOSE),
1916 reference=re.compile(r"""
1917 (
1918 (?P<simple>%(simplename)s)_
1919 | # *OR*
1920 ` # open backquote
1921 (?![ ]) # not space
1922 (?P<phrase>.+?) # hyperlink phrase
1923 %(non_whitespace_escape_before)s
1924 `_ # close backquote,
1925 # reference mark
1926 )
1927 $ # end of string
1928 """ % vars(Inliner), re.VERBOSE),
1929 substitution=re.compile(r"""
1930 (
1931 (?![ ]) # first char. not space
1932 (?P<name>.+?) # substitution text
1933 %(non_whitespace_escape_before)s
1934 \| # close delimiter
1935 )
1936 ([ ]+|$) # followed by whitespace
1937 """ % vars(Inliner),
1938 re.VERBOSE),)
1939
1940 def footnote(self, match):
1941 src, srcline = self.state_machine.get_source_and_line()
1942 (indented, indent, offset, blank_finish
1943 ) = self.state_machine.get_first_known_indented(match.end())
1944 label = match.group(1)
1945 name = normalize_name(label)
1946 footnote = nodes.footnote('\n'.join(indented))
1947 footnote.source = src
1948 footnote.line = srcline
1949 if name[0] == '#': # auto-numbered
1950 name = name[1:] # autonumber label
1951 footnote['auto'] = 1
1952 if name:
1953 footnote['names'].append(name)
1954 self.document.note_autofootnote(footnote)
1955 elif name == '*': # auto-symbol
1956 name = ''
1957 footnote['auto'] = '*'
1958 self.document.note_symbol_footnote(footnote)
1959 else: # manually numbered
1960 footnote += nodes.label('', label)
1961 footnote['names'].append(name)
1962 self.document.note_footnote(footnote)
1963 if name:
1964 self.document.note_explicit_target(footnote, footnote)
1965 else:
1966 self.document.set_id(footnote, footnote)
1967 if indented:
1968 self.nested_parse(indented, input_offset=offset, node=footnote)
1969 else:
1970 footnote += self.reporter.warning('Footnote content expected.')
1971 return [footnote], blank_finish
1972
1973 def citation(self, match):
1974 src, srcline = self.state_machine.get_source_and_line()
1975 (indented, indent, offset, blank_finish
1976 ) = self.state_machine.get_first_known_indented(match.end())
1977 label = match.group(1)
1978 name = normalize_name(label)
1979 citation = nodes.citation('\n'.join(indented))
1980 citation.source = src
1981 citation.line = srcline
1982 citation += nodes.label('', label)
1983 citation['names'].append(name)
1984 self.document.note_citation(citation)
1985 self.document.note_explicit_target(citation, citation)
1986 if indented:
1987 self.nested_parse(indented, input_offset=offset, node=citation)
1988 else:
1989 citation += self.reporter.warning('Citation content expected.')
1990 return [citation], blank_finish
1991
1992 def hyperlink_target(self, match):
1993 pattern = self.explicit.patterns.target
1994 lineno = self.state_machine.abs_line_number()
1995 (block, indent, offset, blank_finish
1996 ) = self.state_machine.get_first_known_indented(
1997 match.end(), until_blank=True, strip_indent=False)
1998 blocktext = match.string[:match.end()] + '\n'.join(block)
1999 block = [escape2null(line) for line in block]
2000 escaped = block[0]
2001 blockindex = 0
2002 while True:
2003 targetmatch = pattern.match(escaped)
2004 if targetmatch:
2005 break
2006 blockindex += 1
2007 try:
2008 escaped += block[blockindex]
2009 except IndexError:
2010 raise MarkupError('malformed hyperlink target.')
2011 del block[:blockindex]
2012 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
2013 target = self.make_target(block, blocktext, lineno,
2014 targetmatch.group('name'))
2015 return [target], blank_finish
2016
2017 def make_target(self, block, block_text, lineno, target_name):
2018 target_type, data = self.parse_target(block, block_text, lineno)
2019 if target_type == 'refname':
2020 target = nodes.target(block_text, '', refname=normalize_name(data))
2021 target.indirect_reference_name = data
2022 self.add_target(target_name, '', target, lineno)
2023 self.document.note_indirect_target(target)
2024 return target
2025 elif target_type == 'refuri':
2026 target = nodes.target(block_text, '')
2027 self.add_target(target_name, data, target, lineno)
2028 return target
2029 else:
2030 return data
2031
2032 def parse_target(self, block, block_text, lineno):
2033 """
2034 Determine the type of reference of a target.
2035
2036 :Return: A 2-tuple, one of:
2037
2038 - 'refname' and the indirect reference name
2039 - 'refuri' and the URI
2040 - 'malformed' and a system_message node
2041 """
2042 if block and block[-1].strip()[-1:] == '_': # possible indirect target
2043 reference = ' '.join(line.strip() for line in block)
2044 refname = self.is_reference(reference)
2045 if refname:
2046 return 'refname', refname
2047 ref_parts = split_escaped_whitespace(' '.join(block))
2048 reference = ' '.join(''.join(unescape(part).split())
2049 for part in ref_parts)
2050 return 'refuri', reference
2051
2052 def is_reference(self, reference):
2053 match = self.explicit.patterns.reference.match(
2054 whitespace_normalize_name(reference))
2055 if not match:
2056 return None
2057 return unescape(match.group('simple') or match.group('phrase'))
2058
2059 def add_target(self, targetname, refuri, target, lineno):
2060 target.line = lineno
2061 if targetname:
2062 name = normalize_name(unescape(targetname))
2063 target['names'].append(name)
2064 if refuri:
2065 uri = self.inliner.adjust_uri(refuri)
2066 if uri:
2067 target['refuri'] = uri
2068 else:
2069 raise ApplicationError('problem with URI: %r' % refuri)
2070 self.document.note_explicit_target(target, self.parent)
2071 else: # anonymous target
2072 if refuri:
2073 target['refuri'] = refuri
2074 target['anonymous'] = True
2075 self.document.note_anonymous_target(target)
2076
2077 def substitution_def(self, match):
2078 pattern = self.explicit.patterns.substitution
2079 src, srcline = self.state_machine.get_source_and_line()
2080 (block, indent, offset, blank_finish
2081 ) = self.state_machine.get_first_known_indented(match.end(),
2082 strip_indent=False)
2083 blocktext = (match.string[:match.end()] + '\n'.join(block))
2084 block.disconnect()
2085 escaped = escape2null(block[0].rstrip())
2086 blockindex = 0
2087 while True:
2088 subdefmatch = pattern.match(escaped)
2089 if subdefmatch:
2090 break
2091 blockindex += 1
2092 try:
2093 escaped = escaped + ' ' + escape2null(
2094 block[blockindex].strip())
2095 except IndexError:
2096 raise MarkupError('malformed substitution definition.')
2097 del block[:blockindex] # strip out the substitution marker
2098 start = subdefmatch.end()-len(escaped)-1
2099 block[0] = (block[0].strip() + ' ')[start:-1]
2100 if not block[0]:
2101 del block[0]
2102 offset += 1
2103 while block and not block[-1].strip():
2104 block.pop()
2105 subname = subdefmatch.group('name')
2106 substitution_node = nodes.substitution_definition(blocktext)
2107 substitution_node.source = src
2108 substitution_node.line = srcline
2109 if not block:
2110 msg = self.reporter.warning(
2111 'Substitution definition "%s" missing contents.' % subname,
2112 nodes.literal_block(blocktext, blocktext),
2113 source=src, line=srcline)
2114 return [msg], blank_finish
2115 block[0] = block[0].strip()
2116 substitution_node['names'].append(
2117 nodes.whitespace_normalize_name(subname))
2118 new_abs_offset, blank_finish = self.nested_list_parse(
2119 block, input_offset=offset, node=substitution_node,
2120 initial_state='SubstitutionDef', blank_finish=blank_finish)
2121 i = 0
2122 for node in substitution_node[:]:
2123 if not (isinstance(node, nodes.Inline)
2124 or isinstance(node, nodes.Text)):
2125 self.parent += substitution_node[i]
2126 del substitution_node[i]
2127 else:
2128 i += 1
2129 for node in substitution_node.findall(nodes.Element):
2130 if self.disallowed_inside_substitution_definitions(node):
2131 pformat = nodes.literal_block('', node.pformat().rstrip())
2132 msg = self.reporter.error(
2133 'Substitution definition contains illegal element <%s>:'
2134 % node.tagname,
2135 pformat, nodes.literal_block(blocktext, blocktext),
2136 source=src, line=srcline)
2137 return [msg], blank_finish
2138 if len(substitution_node) == 0:
2139 msg = self.reporter.warning(
2140 'Substitution definition "%s" empty or invalid.' % subname,
2141 nodes.literal_block(blocktext, blocktext),
2142 source=src, line=srcline)
2143 return [msg], blank_finish
2144 self.document.note_substitution_def(
2145 substitution_node, subname, self.parent)
2146 return [substitution_node], blank_finish
2147
2148 def disallowed_inside_substitution_definitions(self, node) -> bool:
2149 if (node['ids']
2150 or isinstance(node, nodes.reference) and node.get('anonymous')
2151 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501
2152 return True
2153 else:
2154 return False
2155
2156 def directive(self, match, **option_presets):
2157 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2158 type_name = match.group(1)
2159 directive_class, messages = directives.directive(
2160 type_name, self.memo.language, self.document)
2161 self.parent += messages
2162 if directive_class:
2163 return self.run_directive(
2164 directive_class, match, type_name, option_presets)
2165 else:
2166 return self.unknown_directive(type_name)
2167
2168 def run_directive(self, directive, match, type_name, option_presets):
2169 """
2170 Parse a directive then run its directive function.
2171
2172 Parameters:
2173
2174 - `directive`: The class implementing the directive. Must be
2175 a subclass of `rst.Directive`.
2176
2177 - `match`: A regular expression match object which matched the first
2178 line of the directive.
2179
2180 - `type_name`: The directive name, as used in the source text.
2181
2182 - `option_presets`: A dictionary of preset options, defaults for the
2183 directive options. Currently, only an "alt" option is passed by
2184 substitution definitions (value: the substitution name), which may
2185 be used by an embedded image directive.
2186
2187 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2188 """
2189 if isinstance(directive, (FunctionType, MethodType)):
2190 from docutils.parsers.rst import convert_directive_function
2191 directive = convert_directive_function(directive)
2192 lineno = self.state_machine.abs_line_number()
2193 initial_line_offset = self.state_machine.line_offset
2194 (indented, indent, line_offset, blank_finish
2195 ) = self.state_machine.get_first_known_indented(match.end(),
2196 strip_top=0)
2197 block_text = '\n'.join(self.state_machine.input_lines[
2198 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501
2199 try:
2200 arguments, options, content, content_offset = (
2201 self.parse_directive_block(indented, line_offset,
2202 directive, option_presets))
2203 except MarkupError as detail:
2204 error = self.reporter.error(
2205 'Error in "%s" directive:\n%s.' % (type_name,
2206 ' '.join(detail.args)),
2207 nodes.literal_block(block_text, block_text), line=lineno)
2208 return [error], blank_finish
2209 directive_instance = directive(
2210 type_name, arguments, options, content, lineno,
2211 content_offset, block_text, self, self.state_machine)
2212 try:
2213 result = directive_instance.run()
2214 except docutils.parsers.rst.DirectiveError as error:
2215 msg_node = self.reporter.system_message(error.level, error.msg,
2216 line=lineno)
2217 msg_node += nodes.literal_block(block_text, block_text)
2218 result = [msg_node]
2219 assert isinstance(result, list), \
2220 'Directive "%s" must return a list of nodes.' % type_name
2221 for i in range(len(result)):
2222 assert isinstance(result[i], nodes.Node), \
2223 ('Directive "%s" returned non-Node object (index %s): %r'
2224 % (type_name, i, result[i]))
2225 return (result,
2226 blank_finish or self.state_machine.is_next_line_blank())
2227
2228 def parse_directive_block(self, indented, line_offset, directive,
2229 option_presets):
2230 option_spec = directive.option_spec
2231 has_content = directive.has_content
2232 if indented and not indented[0].strip():
2233 indented.trim_start()
2234 line_offset += 1
2235 while indented and not indented[-1].strip():
2236 indented.trim_end()
2237 if indented and (directive.required_arguments
2238 or directive.optional_arguments
2239 or option_spec):
2240 for i, line in enumerate(indented):
2241 if not line.strip():
2242 break
2243 else:
2244 i += 1
2245 arg_block = indented[:i]
2246 content = indented[i+1:]
2247 content_offset = line_offset + i + 1
2248 else:
2249 content = indented
2250 content_offset = line_offset
2251 arg_block = []
2252 if option_spec:
2253 options, arg_block = self.parse_directive_options(
2254 option_presets, option_spec, arg_block)
2255 else:
2256 options = {}
2257 if arg_block and not (directive.required_arguments
2258 or directive.optional_arguments):
2259 content = arg_block + indented[i:]
2260 content_offset = line_offset
2261 arg_block = []
2262 while content and not content[0].strip():
2263 content.trim_start()
2264 content_offset += 1
2265 if directive.required_arguments or directive.optional_arguments:
2266 arguments = self.parse_directive_arguments(
2267 directive, arg_block)
2268 else:
2269 arguments = []
2270 if content and not has_content:
2271 raise MarkupError('no content permitted')
2272 return arguments, options, content, content_offset
2273
2274 def parse_directive_options(self, option_presets, option_spec, arg_block):
2275 options = option_presets.copy()
2276 for i, line in enumerate(arg_block):
2277 if re.match(Body.patterns['field_marker'], line):
2278 opt_block = arg_block[i:]
2279 arg_block = arg_block[:i]
2280 break
2281 else:
2282 opt_block = []
2283 if opt_block:
2284 success, data = self.parse_extension_options(option_spec,
2285 opt_block)
2286 if success: # data is a dict of options
2287 options.update(data)
2288 else: # data is an error string
2289 raise MarkupError(data)
2290 return options, arg_block
2291
2292 def parse_directive_arguments(self, directive, arg_block):
2293 required = directive.required_arguments
2294 optional = directive.optional_arguments
2295 arg_text = '\n'.join(arg_block)
2296 arguments = arg_text.split()
2297 if len(arguments) < required:
2298 raise MarkupError('%s argument(s) required, %s supplied'
2299 % (required, len(arguments)))
2300 elif len(arguments) > required + optional:
2301 if directive.final_argument_whitespace:
2302 arguments = arg_text.split(None, required + optional - 1)
2303 else:
2304 raise MarkupError(
2305 'maximum %s argument(s) allowed, %s supplied'
2306 % (required + optional, len(arguments)))
2307 return arguments
2308
2309 def parse_extension_options(self, option_spec, datalines):
2310 """
2311 Parse `datalines` for a field list containing extension options
2312 matching `option_spec`.
2313
2314 :Parameters:
2315 - `option_spec`: a mapping of option name to conversion
2316 function, which should raise an exception on bad input.
2317 - `datalines`: a list of input strings.
2318
2319 :Return:
2320 - Success value, 1 or 0.
2321 - An option dictionary on success, an error string on failure.
2322 """
2323 node = nodes.field_list()
2324 newline_offset, blank_finish = self.nested_list_parse(
2325 datalines, 0, node, initial_state='ExtensionOptions',
2326 blank_finish=True)
2327 if newline_offset != len(datalines): # incomplete parse of block
2328 return 0, 'invalid option block'
2329 try:
2330 options = utils.extract_extension_options(node, option_spec)
2331 except KeyError as detail:
2332 return 0, 'unknown option: "%s"' % detail.args[0]
2333 except (ValueError, TypeError) as detail:
2334 return 0, 'invalid option value: %s' % ' '.join(detail.args)
2335 except utils.ExtensionOptionError as detail:
2336 return 0, 'invalid option data: %s' % ' '.join(detail.args)
2337 if blank_finish:
2338 return 1, options
2339 else:
2340 return 0, 'option data incompletely parsed'
2341
2342 def unknown_directive(self, type_name):
2343 lineno = self.state_machine.abs_line_number()
2344 (indented, indent, offset, blank_finish
2345 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)
2346 text = '\n'.join(indented)
2347 error = self.reporter.error('Unknown directive type "%s".' % type_name,
2348 nodes.literal_block(text, text),
2349 line=lineno)
2350 return [error], blank_finish
2351
2352 def comment(self, match):
2353 if self.state_machine.is_next_line_blank():
2354 first_comment_line = match.string[match.end():]
2355 if not first_comment_line.strip(): # empty comment
2356 return [nodes.comment()], True # "A tiny but practical wart."
2357 if first_comment_line.startswith('end of inclusion from "'):
2358 # cf. parsers.rst.directives.misc.Include
2359 self.document.include_log.pop()
2360 return [], True
2361 (indented, indent, offset, blank_finish
2362 ) = self.state_machine.get_first_known_indented(match.end())
2363 while indented and not indented[-1].strip():
2364 indented.trim_end()
2365 text = '\n'.join(indented)
2366 return [nodes.comment(text, text)], blank_finish
2367
2368 explicit.constructs = [
2369 (footnote,
2370 re.compile(r"""
2371 \.\.[ ]+ # explicit markup start
2372 \[
2373 ( # footnote label:
2374 [0-9]+ # manually numbered footnote
2375 | # *OR*
2376 \# # anonymous auto-numbered footnote
2377 | # *OR*
2378 \#%s # auto-number ed?) footnote label
2379 | # *OR*
2380 \* # auto-symbol footnote
2381 )
2382 \]
2383 ([ ]+|$) # whitespace or end of line
2384 """ % Inliner.simplename, re.VERBOSE)),
2385 (citation,
2386 re.compile(r"""
2387 \.\.[ ]+ # explicit markup start
2388 \[(%s)\] # citation label
2389 ([ ]+|$) # whitespace or end of line
2390 """ % Inliner.simplename, re.VERBOSE)),
2391 (hyperlink_target,
2392 re.compile(r"""
2393 \.\.[ ]+ # explicit markup start
2394 _ # target indicator
2395 (?![ ]|$) # first char. not space or EOL
2396 """, re.VERBOSE)),
2397 (substitution_def,
2398 re.compile(r"""
2399 \.\.[ ]+ # explicit markup start
2400 \| # substitution indicator
2401 (?![ ]|$) # first char. not space or EOL
2402 """, re.VERBOSE)),
2403 (directive,
2404 re.compile(r"""
2405 \.\.[ ]+ # explicit markup start
2406 (%s) # directive name
2407 [ ]? # optional space
2408 :: # directive delimiter
2409 ([ ]+|$) # whitespace or end of line
2410 """ % Inliner.simplename, re.VERBOSE))]
2411
2412 def explicit_markup(self, match, context, next_state):
2413 """Footnotes, hyperlink targets, directives, comments."""
2414 nodelist, blank_finish = self.explicit_construct(match)
2415 self.parent += nodelist
2416 self.explicit_list(blank_finish)
2417 return [], next_state, []
2418
2419 def explicit_construct(self, match):
2420 """Determine which explicit construct this is, parse & return it."""
2421 errors = []
2422 for method, pattern in self.explicit.constructs:
2423 expmatch = pattern.match(match.string)
2424 if expmatch:
2425 try:
2426 return method(self, expmatch)
2427 except MarkupError as error:
2428 lineno = self.state_machine.abs_line_number()
2429 message = ' '.join(error.args)
2430 errors.append(self.reporter.warning(message, line=lineno))
2431 break
2432 nodelist, blank_finish = self.comment(match)
2433 return nodelist + errors, blank_finish
2434
2435 def explicit_list(self, blank_finish) -> None:
2436 """
2437 Create a nested state machine for a series of explicit markup
2438 constructs (including anonymous hyperlink targets).
2439 """
2440 offset = self.state_machine.line_offset + 1 # next line
2441 newline_offset, blank_finish = self.nested_list_parse(
2442 self.state_machine.input_lines[offset:],
2443 input_offset=self.state_machine.abs_line_offset() + 1,
2444 node=self.parent, initial_state='Explicit',
2445 blank_finish=blank_finish)
2446 self.goto_line(newline_offset)
2447 if not blank_finish:
2448 self.parent += self.unindent_warning('Explicit markup')
2449
2450 def anonymous(self, match, context, next_state):
2451 """Anonymous hyperlink targets."""
2452 nodelist, blank_finish = self.anonymous_target(match)
2453 self.parent += nodelist
2454 self.explicit_list(blank_finish)
2455 return [], next_state, []
2456
2457 def anonymous_target(self, match):
2458 lineno = self.state_machine.abs_line_number()
2459 (block, indent, offset, blank_finish
2460 ) = self.state_machine.get_first_known_indented(match.end(),
2461 until_blank=True)
2462 blocktext = match.string[:match.end()] + '\n'.join(block)
2463 block = [escape2null(line) for line in block]
2464 target = self.make_target(block, blocktext, lineno, '')
2465 return [target], blank_finish
2466
2467 def line(self, match, context, next_state):
2468 """Section title overline or transition marker."""
2469 if self.state_machine.match_titles:
2470 return [match.string], 'Line', []
2471 elif match.string.strip() == '::':
2472 raise statemachine.TransitionCorrection('text')
2473 elif len(match.string.strip()) < 4:
2474 msg = self.reporter.info(
2475 'Unexpected possible title overline or transition.\n'
2476 "Treating it as ordinary text because it's so short.",
2477 line=self.state_machine.abs_line_number())
2478 self.parent += msg
2479 raise statemachine.TransitionCorrection('text')
2480 else:
2481 blocktext = self.state_machine.line
2482 msg = self.reporter.error(
2483 'Unexpected section title or transition.',
2484 nodes.literal_block(blocktext, blocktext),
2485 line=self.state_machine.abs_line_number())
2486 self.parent += msg
2487 return [], next_state, []
2488
2489 def text(self, match, context, next_state):
2490 """Titles, definition lists, paragraphs."""
2491 return [match.string], 'Text', []
2492
2493
2494class RFC2822Body(Body):
2495
2496 """
2497 RFC2822 headers are only valid as the first constructs in documents. As
2498 soon as anything else appears, the `Body` state should take over.
2499 """
2500
2501 patterns = Body.patterns.copy() # can't modify the original
2502 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
2503 initial_transitions = [(name, 'Body')
2504 for name in Body.initial_transitions]
2505 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2506
2507 def rfc2822(self, match, context, next_state):
2508 """RFC2822-style field list item."""
2509 fieldlist = nodes.field_list(classes=['rfc2822'])
2510 self.parent += fieldlist
2511 field, blank_finish = self.rfc2822_field(match)
2512 fieldlist += field
2513 offset = self.state_machine.line_offset + 1 # next line
2514 newline_offset, blank_finish = self.nested_list_parse(
2515 self.state_machine.input_lines[offset:],
2516 input_offset=self.state_machine.abs_line_offset() + 1,
2517 node=fieldlist, initial_state='RFC2822List',
2518 blank_finish=blank_finish)
2519 self.goto_line(newline_offset)
2520 if not blank_finish:
2521 self.parent += self.unindent_warning(
2522 'RFC2822-style field list')
2523 return [], next_state, []
2524
2525 def rfc2822_field(self, match):
2526 name = match.string[:match.string.find(':')]
2527 (indented, indent, line_offset, blank_finish
2528 ) = self.state_machine.get_first_known_indented(match.end(),
2529 until_blank=True)
2530 fieldnode = nodes.field()
2531 fieldnode += nodes.field_name(name, name)
2532 fieldbody = nodes.field_body('\n'.join(indented))
2533 fieldnode += fieldbody
2534 if indented:
2535 self.nested_parse(indented, input_offset=line_offset,
2536 node=fieldbody)
2537 return fieldnode, blank_finish
2538
2539
2540class SpecializedBody(Body):
2541
2542 """
2543 Superclass for second and subsequent compound element members. Compound
2544 elements are lists and list-like constructs.
2545
2546 All transition methods are disabled (redefined as `invalid_input`).
2547 Override individual methods in subclasses to re-enable.
2548
2549 For example, once an initial bullet list item, say, is recognized, the
2550 `BulletList` subclass takes over, with a "bullet_list" node as its
2551 container. Upon encountering the initial bullet list item, `Body.bullet`
2552 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2553 starts up a nested parsing session with `BulletList` as the initial state.
2554 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2555 as only bullet list items are encountered, they are parsed and inserted
2556 into the container. The first construct which is *not* a bullet list item
2557 triggers the `invalid_input` method, which ends the nested parse and
2558 closes the container. `BulletList` needs to recognize input that is
2559 invalid in the context of a bullet list, which means everything *other
2560 than* bullet list items, so it inherits the transition list created in
2561 `Body`.
2562 """
2563
2564 def invalid_input(self, match=None, context=None, next_state=None):
2565 """Not a compound element member. Abort this state machine."""
2566 self.state_machine.previous_line() # back up so parent SM can reassess
2567 raise EOFError
2568
2569 indent = invalid_input
2570 bullet = invalid_input
2571 enumerator = invalid_input
2572 field_marker = invalid_input
2573 option_marker = invalid_input
2574 doctest = invalid_input
2575 line_block = invalid_input
2576 grid_table_top = invalid_input
2577 simple_table_top = invalid_input
2578 explicit_markup = invalid_input
2579 anonymous = invalid_input
2580 line = invalid_input
2581 text = invalid_input
2582
2583
2584class BulletList(SpecializedBody):
2585
2586 """Second and subsequent bullet_list list_items."""
2587
2588 def bullet(self, match, context, next_state):
2589 """Bullet list item."""
2590 if match.string[0] != self.parent['bullet']:
2591 # different bullet: new list
2592 self.invalid_input()
2593 listitem, blank_finish = self.list_item(match.end())
2594 self.parent += listitem
2595 self.blank_finish = blank_finish
2596 return [], next_state, []
2597
2598
2599class DefinitionList(SpecializedBody):
2600
2601 """Second and subsequent definition_list_items."""
2602
2603 def text(self, match, context, next_state):
2604 """Definition lists."""
2605 return [match.string], 'Definition', []
2606
2607
2608class EnumeratedList(SpecializedBody):
2609
2610 """Second and subsequent enumerated_list list_items."""
2611
2612 def enumerator(self, match, context, next_state):
2613 """Enumerated list item."""
2614 format, sequence, text, ordinal = self.parse_enumerator(
2615 match, self.parent['enumtype'])
2616 if (format != self.format
2617 or (sequence != '#' and (sequence != self.parent['enumtype']
2618 or self.auto
2619 or ordinal != (self.lastordinal + 1)))
2620 or not self.is_enumerated_list_item(ordinal, sequence, format)):
2621 # different enumeration: new list
2622 self.invalid_input()
2623 if sequence == '#':
2624 self.auto = 1
2625 listitem, blank_finish = self.list_item(match.end())
2626 self.parent += listitem
2627 self.blank_finish = blank_finish
2628 self.lastordinal = ordinal
2629 return [], next_state, []
2630
2631
2632class FieldList(SpecializedBody):
2633
2634 """Second and subsequent field_list fields."""
2635
2636 def field_marker(self, match, context, next_state):
2637 """Field list field."""
2638 field, blank_finish = self.field(match)
2639 self.parent += field
2640 self.blank_finish = blank_finish
2641 return [], next_state, []
2642
2643
2644class OptionList(SpecializedBody):
2645
2646 """Second and subsequent option_list option_list_items."""
2647
2648 def option_marker(self, match, context, next_state):
2649 """Option list item."""
2650 try:
2651 option_list_item, blank_finish = self.option_list_item(match)
2652 except MarkupError:
2653 self.invalid_input()
2654 self.parent += option_list_item
2655 self.blank_finish = blank_finish
2656 return [], next_state, []
2657
2658
2659class RFC2822List(SpecializedBody, RFC2822Body):
2660
2661 """Second and subsequent RFC2822-style field_list fields."""
2662
2663 patterns = RFC2822Body.patterns
2664 initial_transitions = RFC2822Body.initial_transitions
2665
2666 def rfc2822(self, match, context, next_state):
2667 """RFC2822-style field list item."""
2668 field, blank_finish = self.rfc2822_field(match)
2669 self.parent += field
2670 self.blank_finish = blank_finish
2671 return [], 'RFC2822List', []
2672
2673 blank = SpecializedBody.invalid_input
2674
2675
2676class ExtensionOptions(FieldList):
2677
2678 """
2679 Parse field_list fields for extension options.
2680
2681 No nested parsing is done (including inline markup parsing).
2682 """
2683
2684 def parse_field_body(self, indented, offset, node) -> None:
2685 """Override `Body.parse_field_body` for simpler parsing."""
2686 lines = []
2687 for line in list(indented) + ['']:
2688 if line.strip():
2689 lines.append(line)
2690 elif lines:
2691 text = '\n'.join(lines)
2692 node += nodes.paragraph(text, text)
2693 lines = []
2694
2695
2696class LineBlock(SpecializedBody):
2697
2698 """Second and subsequent lines of a line_block."""
2699
2700 blank = SpecializedBody.invalid_input
2701
2702 def line_block(self, match, context, next_state):
2703 """New line of line block."""
2704 lineno = self.state_machine.abs_line_number()
2705 line, messages, blank_finish = self.line_block_line(match, lineno)
2706 self.parent += line
2707 self.parent.parent += messages
2708 self.blank_finish = blank_finish
2709 return [], next_state, []
2710
2711
2712class Explicit(SpecializedBody):
2713
2714 """Second and subsequent explicit markup construct."""
2715
2716 def explicit_markup(self, match, context, next_state):
2717 """Footnotes, hyperlink targets, directives, comments."""
2718 nodelist, blank_finish = self.explicit_construct(match)
2719 self.parent += nodelist
2720 self.blank_finish = blank_finish
2721 return [], next_state, []
2722
2723 def anonymous(self, match, context, next_state):
2724 """Anonymous hyperlink targets."""
2725 nodelist, blank_finish = self.anonymous_target(match)
2726 self.parent += nodelist
2727 self.blank_finish = blank_finish
2728 return [], next_state, []
2729
2730 blank = SpecializedBody.invalid_input
2731
2732
2733class SubstitutionDef(Body):
2734
2735 """
2736 Parser for the contents of a substitution_definition element.
2737 """
2738
2739 patterns = {
2740 'embedded_directive': re.compile(r'(%s)::( +|$)'
2741 % Inliner.simplename),
2742 'text': r''}
2743 initial_transitions = ['embedded_directive', 'text']
2744
2745 def embedded_directive(self, match, context, next_state):
2746 nodelist, blank_finish = self.directive(match,
2747 alt=self.parent['names'][0])
2748 self.parent += nodelist
2749 if not self.state_machine.at_eof():
2750 self.blank_finish = blank_finish
2751 raise EOFError
2752
2753 def text(self, match, context, next_state):
2754 if not self.state_machine.at_eof():
2755 self.blank_finish = self.state_machine.is_next_line_blank()
2756 raise EOFError
2757
2758
2759class Text(RSTState):
2760
2761 """
2762 Classifier of second line of a text block.
2763
2764 Could be a paragraph, a definition list item, or a title.
2765 """
2766
2767 patterns = {'underline': Body.patterns['line'],
2768 'text': r''}
2769 initial_transitions = [('underline', 'Body'), ('text', 'Body')]
2770
2771 def blank(self, match, context, next_state):
2772 """End of paragraph."""
2773 # NOTE: self.paragraph returns [node, system_message(s)], literalnext
2774 paragraph, literalnext = self.paragraph(
2775 context, self.state_machine.abs_line_number() - 1)
2776 self.parent += paragraph
2777 if literalnext:
2778 self.parent += self.literal_block()
2779 return [], 'Body', []
2780
2781 def eof(self, context):
2782 if context:
2783 self.blank(None, context, None)
2784 return []
2785
2786 def indent(self, match, context, next_state):
2787 """Definition list item."""
2788 dl = nodes.definition_list()
2789 # the definition list starts on the line before the indent:
2790 lineno = self.state_machine.abs_line_number() - 1
2791 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)
2792 dl_item, blank_finish = self.definition_list_item(context)
2793 dl += dl_item
2794 self.parent += dl
2795 offset = self.state_machine.line_offset + 1 # next line
2796 newline_offset, blank_finish = self.nested_list_parse(
2797 self.state_machine.input_lines[offset:],
2798 input_offset=self.state_machine.abs_line_offset() + 1,
2799 node=dl, initial_state='DefinitionList',
2800 blank_finish=blank_finish, blank_finish_state='Definition')
2801 self.goto_line(newline_offset)
2802 if not blank_finish:
2803 self.parent += self.unindent_warning('Definition list')
2804 return [], 'Body', []
2805
2806 def underline(self, match, context, next_state):
2807 """Section title."""
2808 lineno = self.state_machine.abs_line_number()
2809 title = context[0].rstrip()
2810 underline = match.string.rstrip()
2811 source = title + '\n' + underline
2812 messages = []
2813 if column_width(title) > len(underline):
2814 if len(underline) < 4:
2815 if self.state_machine.match_titles:
2816 msg = self.reporter.info(
2817 'Possible title underline, too short for the title.\n'
2818 "Treating it as ordinary text because it's so short.",
2819 line=lineno)
2820 self.parent += msg
2821 raise statemachine.TransitionCorrection('text')
2822 else:
2823 blocktext = context[0] + '\n' + self.state_machine.line
2824 msg = self.reporter.warning(
2825 'Title underline too short.',
2826 nodes.literal_block(blocktext, blocktext),
2827 line=lineno)
2828 messages.append(msg)
2829 if not self.state_machine.match_titles:
2830 blocktext = context[0] + '\n' + self.state_machine.line
2831 # We need get_source_and_line() here to report correctly
2832 src, srcline = self.state_machine.get_source_and_line()
2833 # TODO: why is abs_line_number() == srcline+1
2834 # if the error is in a table (try with test_tables.py)?
2835 # print("get_source_and_line", srcline)
2836 # print("abs_line_number", self.state_machine.abs_line_number())
2837 msg = self.reporter.error(
2838 'Unexpected section title.',
2839 nodes.literal_block(blocktext, blocktext),
2840 source=src, line=srcline)
2841 self.parent += messages
2842 self.parent += msg
2843 return [], next_state, []
2844 style = underline[0]
2845 context[:] = []
2846 self.section(title, source, style, lineno - 1, messages)
2847 return [], next_state, []
2848
2849 def text(self, match, context, next_state):
2850 """Paragraph."""
2851 startline = self.state_machine.abs_line_number() - 1
2852 msg = None
2853 try:
2854 block = self.state_machine.get_text_block(flush_left=True)
2855 except statemachine.UnexpectedIndentationError as err:
2856 block, src, srcline = err.args
2857 msg = self.reporter.error('Unexpected indentation.',
2858 source=src, line=srcline)
2859 lines = context + list(block)
2860 paragraph, literalnext = self.paragraph(lines, startline)
2861 self.parent += paragraph
2862 self.parent += msg
2863 if literalnext:
2864 try:
2865 self.state_machine.next_line()
2866 except EOFError:
2867 pass
2868 self.parent += self.literal_block()
2869 return [], next_state, []
2870
2871 def literal_block(self):
2872 """Return a list of nodes."""
2873 (indented, indent, offset, blank_finish
2874 ) = self.state_machine.get_indented()
2875 while indented and not indented[-1].strip():
2876 indented.trim_end()
2877 if not indented:
2878 return self.quoted_literal_block()
2879 data = '\n'.join(indented)
2880 literal_block = nodes.literal_block(data, data)
2881 (literal_block.source,
2882 literal_block.line) = self.state_machine.get_source_and_line(offset+1)
2883 nodelist = [literal_block]
2884 if not blank_finish:
2885 nodelist.append(self.unindent_warning('Literal block'))
2886 return nodelist
2887
2888 def quoted_literal_block(self):
2889 abs_line_offset = self.state_machine.abs_line_offset()
2890 offset = self.state_machine.line_offset
2891 parent_node = nodes.Element()
2892 new_abs_offset = self.nested_parse(
2893 self.state_machine.input_lines[offset:],
2894 input_offset=abs_line_offset, node=parent_node, match_titles=False,
2895 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
2896 'initial_state': 'QuotedLiteralBlock'})
2897 self.goto_line(new_abs_offset)
2898 return parent_node.children
2899
2900 def definition_list_item(self, termline):
2901 # the parser is already on the second (indented) line:
2902 dd_lineno = self.state_machine.abs_line_number()
2903 dt_lineno = dd_lineno - 1
2904 (indented, indent, line_offset, blank_finish
2905 ) = self.state_machine.get_indented()
2906 dl_item = nodes.definition_list_item(
2907 '\n'.join(termline + list(indented)))
2908 (dl_item.source,
2909 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)
2910 dt_nodes, messages = self.term(termline, dt_lineno)
2911 dl_item += dt_nodes
2912 dd = nodes.definition('', *messages)
2913 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)
2914 dl_item += dd
2915 if termline[0][-2:] == '::':
2916 dd += self.reporter.info(
2917 'Blank line missing before literal block (after the "::")? '
2918 'Interpreted as a definition list item.',
2919 line=dd_lineno)
2920 # TODO: drop a definition if it is an empty comment to allow
2921 # definition list items with several terms?
2922 # https://sourceforge.net/p/docutils/feature-requests/60/
2923 self.nested_parse(indented, input_offset=line_offset, node=dd)
2924 return dl_item, blank_finish
2925
2926 classifier_delimiter = re.compile(' +: +')
2927
2928 def term(self, lines, lineno):
2929 """Return a definition_list's term and optional classifiers."""
2930 assert len(lines) == 1
2931 text_nodes, messages = self.inline_text(lines[0], lineno)
2932 dt = nodes.term(lines[0])
2933 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)
2934 node_list = [dt]
2935 for i in range(len(text_nodes)):
2936 node = text_nodes[i]
2937 if isinstance(node, nodes.Text):
2938 parts = self.classifier_delimiter.split(node)
2939 if len(parts) == 1:
2940 node_list[-1] += node
2941 else:
2942 text = parts[0].rstrip()
2943 textnode = nodes.Text(text)
2944 node_list[-1] += textnode
2945 node_list += [nodes.classifier(unescape(part, True), part)
2946 for part in parts[1:]]
2947 else:
2948 node_list[-1] += node
2949 return node_list, messages
2950
2951
2952class SpecializedText(Text):
2953
2954 """
2955 Superclass for second and subsequent lines of Text-variants.
2956
2957 All transition methods are disabled. Override individual methods in
2958 subclasses to re-enable.
2959 """
2960
2961 def eof(self, context):
2962 """Incomplete construct."""
2963 return []
2964
2965 def invalid_input(self, match=None, context=None, next_state=None):
2966 """Not a compound element member. Abort this state machine."""
2967 raise EOFError
2968
2969 blank = invalid_input
2970 indent = invalid_input
2971 underline = invalid_input
2972 text = invalid_input
2973
2974
2975class Definition(SpecializedText):
2976
2977 """Second line of potential definition_list_item."""
2978
2979 def eof(self, context):
2980 """Not a definition."""
2981 self.state_machine.previous_line(2) # so parent SM can reassess
2982 return []
2983
2984 def indent(self, match, context, next_state):
2985 """Definition list item."""
2986 dl_item, blank_finish = self.definition_list_item(context)
2987 self.parent += dl_item
2988 self.blank_finish = blank_finish
2989 return [], 'DefinitionList', []
2990
2991
2992class Line(SpecializedText):
2993
2994 """
2995 Second line of over- & underlined section title or transition marker.
2996 """
2997
2998 eofcheck = 1 # ignored, will be removed in Docutils 2.0.
2999
3000 def eof(self, context):
3001 """Transition marker at end of section or document."""
3002 marker = context[0].strip()
3003 if len(marker) < 4:
3004 self.state_correction(context)
3005 src, srcline = self.state_machine.get_source_and_line()
3006 # lineno = self.state_machine.abs_line_number() - 1
3007 transition = nodes.transition(rawsource=context[0])
3008 transition.source = src
3009 transition.line = srcline - 1
3010 # transition.line = lineno
3011 self.parent += transition
3012 return []
3013
3014 def blank(self, match, context, next_state):
3015 """Transition marker."""
3016 src, srcline = self.state_machine.get_source_and_line()
3017 marker = context[0].strip()
3018 if len(marker) < 4:
3019 self.state_correction(context)
3020 transition = nodes.transition(rawsource=marker)
3021 transition.source = src
3022 transition.line = srcline - 1
3023 self.parent += transition
3024 return [], 'Body', []
3025
3026 def text(self, match, context, next_state):
3027 """Potential over- & underlined title."""
3028 lineno = self.state_machine.abs_line_number() - 1
3029 overline = context[0]
3030 title = match.string
3031 underline = ''
3032 try:
3033 underline = self.state_machine.next_line()
3034 except EOFError:
3035 blocktext = overline + '\n' + title
3036 if len(overline.rstrip()) < 4:
3037 self.short_overline(context, blocktext, lineno, 2)
3038 else:
3039 msg = self.reporter.error(
3040 'Incomplete section title.',
3041 nodes.literal_block(blocktext, blocktext),
3042 line=lineno)
3043 self.parent += msg
3044 return [], 'Body', []
3045 source = '%s\n%s\n%s' % (overline, title, underline)
3046 overline = overline.rstrip()
3047 underline = underline.rstrip()
3048 if not self.transitions['underline'][0].match(underline):
3049 blocktext = overline + '\n' + title + '\n' + underline
3050 if len(overline.rstrip()) < 4:
3051 self.short_overline(context, blocktext, lineno, 2)
3052 else:
3053 msg = self.reporter.error(
3054 'Missing matching underline for section title overline.',
3055 nodes.literal_block(source, source),
3056 line=lineno)
3057 self.parent += msg
3058 return [], 'Body', []
3059 elif overline != underline:
3060 blocktext = overline + '\n' + title + '\n' + underline
3061 if len(overline.rstrip()) < 4:
3062 self.short_overline(context, blocktext, lineno, 2)
3063 else:
3064 msg = self.reporter.error(
3065 'Title overline & underline mismatch.',
3066 nodes.literal_block(source, source),
3067 line=lineno)
3068 self.parent += msg
3069 return [], 'Body', []
3070 title = title.rstrip()
3071 messages = []
3072 if column_width(title) > len(overline):
3073 blocktext = overline + '\n' + title + '\n' + underline
3074 if len(overline.rstrip()) < 4:
3075 self.short_overline(context, blocktext, lineno, 2)
3076 else:
3077 msg = self.reporter.warning(
3078 'Title overline too short.',
3079 nodes.literal_block(source, source),
3080 line=lineno)
3081 messages.append(msg)
3082 style = (overline[0], underline[0])
3083 self.section(title.lstrip(), source, style, lineno + 1, messages)
3084 return [], 'Body', []
3085
3086 indent = text # indented title
3087
3088 def underline(self, match, context, next_state):
3089 overline = context[0]
3090 blocktext = overline + '\n' + self.state_machine.line
3091 lineno = self.state_machine.abs_line_number() - 1
3092 if len(overline.rstrip()) < 4:
3093 self.short_overline(context, blocktext, lineno, 1)
3094 msg = self.reporter.error(
3095 'Invalid section title or transition marker.',
3096 nodes.literal_block(blocktext, blocktext),
3097 line=lineno)
3098 self.parent += msg
3099 return [], 'Body', []
3100
3101 def short_overline(self, context, blocktext, lineno, lines=1) -> None:
3102 msg = self.reporter.info(
3103 'Possible incomplete section title.\nTreating the overline as '
3104 "ordinary text because it's so short.",
3105 line=lineno)
3106 self.parent += msg
3107 self.state_correction(context, lines)
3108
3109 def state_correction(self, context, lines=1):
3110 self.state_machine.previous_line(lines)
3111 context[:] = []
3112 raise statemachine.StateCorrection('Body', 'text')
3113
3114
3115class QuotedLiteralBlock(RSTState):
3116
3117 """
3118 Nested parse handler for quoted (unindented) literal blocks.
3119
3120 Special-purpose. Not for inclusion in `state_classes`.
3121 """
3122
3123 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
3124 'text': r''}
3125 initial_transitions = ('initial_quoted', 'text')
3126
3127 def __init__(self, state_machine, debug=False) -> None:
3128 RSTState.__init__(self, state_machine, debug)
3129 self.messages = []
3130 self.initial_lineno = None
3131
3132 def blank(self, match, context, next_state):
3133 if context:
3134 raise EOFError
3135 else:
3136 return context, next_state, []
3137
3138 def eof(self, context):
3139 if context:
3140 src, srcline = self.state_machine.get_source_and_line(
3141 self.initial_lineno)
3142 text = '\n'.join(context)
3143 literal_block = nodes.literal_block(text, text)
3144 literal_block.source = src
3145 literal_block.line = srcline
3146 self.parent += literal_block
3147 else:
3148 self.parent += self.reporter.warning(
3149 'Literal block expected; none found.',
3150 line=self.state_machine.abs_line_number()
3151 ) # src not available, statemachine.input_lines is empty
3152 self.state_machine.previous_line()
3153 self.parent += self.messages
3154 return []
3155
3156 def indent(self, match, context, next_state):
3157 assert context, ('QuotedLiteralBlock.indent: context should not '
3158 'be empty!')
3159 self.messages.append(
3160 self.reporter.error('Unexpected indentation.',
3161 line=self.state_machine.abs_line_number()))
3162 self.state_machine.previous_line()
3163 raise EOFError
3164
3165 def initial_quoted(self, match, context, next_state):
3166 """Match arbitrary quote character on the first line only."""
3167 self.remove_transition('initial_quoted')
3168 quote = match.string[0]
3169 pattern = re.compile(re.escape(quote))
3170 # New transition matches consistent quotes only:
3171 self.add_transition('quoted',
3172 (pattern, self.quoted, self.__class__.__name__))
3173 self.initial_lineno = self.state_machine.abs_line_number()
3174 return [match.string], next_state, []
3175
3176 def quoted(self, match, context, next_state):
3177 """Match consistent quotes on subsequent lines."""
3178 context.append(match.string)
3179 return context, next_state, []
3180
3181 def text(self, match, context, next_state):
3182 if context:
3183 self.messages.append(
3184 self.reporter.error('Inconsistent literal block quoting.',
3185 line=self.state_machine.abs_line_number()))
3186 self.state_machine.previous_line()
3187 raise EOFError
3188
3189
3190state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
3191 OptionList, LineBlock, ExtensionOptions, Explicit, Text,
3192 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
3193"""Standard set of State classes used to start `RSTStateMachine`."""