1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
4
5"""
6This is the ``docutils.parsers.rst.states`` module, the core of
7the reStructuredText parser. It defines the following:
8
9:Classes:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: obsolete, use `types.SimpleNamespace`.
30
31:Exception classes:
32 - `MarkupError`
33 - `ParserError`
34 - `MarkupMismatch`
35
36:Functions:
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
39
40:Attributes:
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
42
43Parser Overview
44===============
45
46The reStructuredText parser is implemented as a recursive state machine,
47examining its input one line at a time. To understand how the parser works,
48please first become familiar with the `docutils.statemachine` module. In the
49description below, references are made to classes defined in this module;
50please see the individual classes for details.
51
52Parsing proceeds as follows:
53
541. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
58
592. The method associated with the matched transition pattern is called.
60
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
65
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
70
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
73
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
83
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
86
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
90
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
93
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
96 used.
97
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
101"""
102
103from __future__ import annotations
104
105__docformat__ = 'reStructuredText'
106
107import copy
108import re
109from types import FunctionType, MethodType
110from types import SimpleNamespace as Struct
111
112from docutils import nodes, statemachine, utils
113from docutils import ApplicationError, DataError
114from docutils.statemachine import StateMachineWS, StateWS
115from docutils.nodes import fully_normalize_name as normalize_name
116from docutils.nodes import unescape, whitespace_normalize_name
117import docutils.parsers.rst
118from docutils.parsers.rst import directives, languages, tableparser, roles
119from docutils.utils import escape2null, column_width
120from docutils.utils import punctuation_chars, urischemes
121from docutils.utils import split_escaped_whitespace
122from docutils.utils._roman_numerals import (InvalidRomanNumeralError,
123 RomanNumeral)
124
125TYPE_CHECKING = False
126if TYPE_CHECKING:
127 from docutils.statemachine import StringList
128
129
130class MarkupError(DataError): pass
131class UnknownInterpretedRoleError(DataError): pass
132class InterpretedRoleNotImplementedError(DataError): pass
133class ParserError(ApplicationError): pass
134class MarkupMismatch(Exception): pass
135
136
137class RSTStateMachine(StateMachineWS):
138
139 """
140 reStructuredText's master StateMachine.
141
142 The entry point to reStructuredText parsing is the `run()` method.
143 """
144
145 def run(self, input_lines, document, input_offset=0, match_titles=True,
146 inliner=None) -> None:
147 """
148 Parse `input_lines` and modify the `document` node in place.
149
150 Extend `StateMachineWS.run()`: set up parse-global data and
151 run the StateMachine.
152 """
153 self.language = languages.get_language(
154 document.settings.language_code, document.reporter)
155 self.match_titles = match_titles
156 if inliner is None:
157 inliner = Inliner()
158 inliner.init_customizations(document.settings)
159 # A collection of objects to share with nested parsers.
160 # The attributes `reporter`, `section_level`, and
161 # `section_bubble_up_kludge` will be removed in Docutils 2.0
162 self.memo = Struct(document=document,
163 reporter=document.reporter, # ignored
164 language=self.language,
165 title_styles=[],
166 section_level=0, # ignored
167 section_bubble_up_kludge=False, # ignored
168 inliner=inliner)
169 self.document = document
170 self.attach_observer(document.note_source)
171 self.reporter = self.document.reporter
172 self.node = document
173 results = StateMachineWS.run(self, input_lines, input_offset,
174 input_source=document['source'])
175 assert results == [], 'RSTStateMachine.run() results should be empty!'
176 self.node = self.memo = None # remove unneeded references
177
178
179class NestedStateMachine(StateMachineWS):
180 """
181 StateMachine run from within other StateMachine runs, to parse nested
182 document structures.
183 """
184
185 def run(self, input_lines, input_offset, memo, node, match_titles=True):
186 """
187 Parse `input_lines` and populate `node`.
188
189 Use a separate "title style hierarchy" (changed in Docutils 0.23).
190
191 Extend `StateMachineWS.run()`: set up document-wide data.
192 """
193 self.match_titles = match_titles
194 self.memo = copy.copy(memo)
195 self.document = memo.document
196 self.attach_observer(self.document.note_source)
197 self.language = memo.language
198 self.reporter = self.document.reporter
199 self.node = node
200 if match_titles:
201 # Use a separate section title style hierarchy;
202 # ensure all sections in the `input_lines` are treated as
203 # subsections of the current section by blocking lower
204 # section levels with a style that is impossible in rST:
205 self.memo.title_styles = ['x'] * len(node.section_hierarchy())
206 results = StateMachineWS.run(self, input_lines, input_offset)
207 assert results == [], ('NestedStateMachine.run() results should be '
208 'empty!')
209 return results
210
211
212class RSTState(StateWS):
213
214 """
215 reStructuredText State superclass.
216
217 Contains methods used by all State subclasses.
218 """
219
220 nested_sm = NestedStateMachine
221 nested_sm_cache = []
222
223 def __init__(self, state_machine, debug=False) -> None:
224 self.nested_sm_kwargs = {'state_classes': state_classes,
225 'initial_state': 'Body'}
226 StateWS.__init__(self, state_machine, debug)
227
228 def runtime_init(self) -> None:
229 StateWS.runtime_init(self)
230 memo = self.state_machine.memo
231 self.memo = memo
232 self.document = memo.document
233 self.inliner = memo.inliner
234 self.reporter = self.document.reporter
235 # enable the reporter to determine source and source-line
236 if not hasattr(self.reporter, 'get_source_and_line'):
237 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501
238
239 @property
240 def parent(self) -> nodes.Element | None:
241 return self.state_machine.node
242
243 @parent.setter
244 def parent(self, value: nodes.Element):
245 self.state_machine.node = value
246
247 def goto_line(self, abs_line_offset) -> None:
248 """
249 Jump to input line `abs_line_offset`, ignoring jumps past the end.
250 """
251 try:
252 self.state_machine.goto_line(abs_line_offset)
253 except EOFError:
254 pass
255
256 def no_match(self, context, transitions):
257 """
258 Override `StateWS.no_match` to generate a system message.
259
260 This code should never be run.
261 """
262 self.reporter.severe(
263 'Internal error: no transition pattern match. State: "%s"; '
264 'transitions: %s; context: %s; current line: %r.'
265 % (self.__class__.__name__, transitions, context,
266 self.state_machine.line))
267 return context, None, []
268
269 def bof(self, context):
270 """Called at beginning of file."""
271 return [], []
272
273 def nested_parse(self,
274 block: StringList,
275 input_offset: int,
276 node: nodes.Element,
277 match_titles: bool = False,
278 state_machine_class: StateMachineWS|None = None,
279 state_machine_kwargs: dict|None = None
280 ) -> int:
281 """
282 Parse the input `block` with a nested state-machine rooted at `node`.
283
284 :block:
285 reStructuredText source extract.
286 :input_offset:
287 Line number at start of the block.
288 :node:
289 Base node. All generated nodes will be appended to this node.
290 :match_titles:
291 Allow section titles?
292 A separate section title style hierarchy is used for the nested
293 parsing (all sections are subsections of the current section).
294 The calling code should check whether sections are valid
295 children of the base node and move them or warn otherwise.
296 :state_machine_class:
297 Default: `NestedStateMachine`.
298 :state_machine_kwargs:
299 Keyword arguments for the state-machine instantiation.
300 Default: `self.nested_sm_kwargs`.
301
302 Create a new state-machine instance if required.
303 Return new offset.
304 """
305 use_default = 0
306 if state_machine_class is None:
307 state_machine_class = self.nested_sm
308 use_default += 1
309 if state_machine_kwargs is None:
310 state_machine_kwargs = self.nested_sm_kwargs
311 use_default += 1
312 state_machine = None
313 if use_default == 2:
314 try:
315 state_machine = self.nested_sm_cache.pop()
316 except IndexError:
317 pass
318 if not state_machine:
319 state_machine = state_machine_class(
320 debug=self.debug,
321 parent_state_machine=self.state_machine,
322 **state_machine_kwargs)
323 # run the statemachine and populate `node`:
324 block_length = len(block)
325 state_machine.run(block, input_offset, memo=self.memo,
326 node=node, match_titles=match_titles)
327 # clean up
328 if use_default == 2:
329 self.nested_sm_cache.append(state_machine)
330 else:
331 state_machine.unlink()
332 new_offset = state_machine.abs_line_offset()
333 # No `block.parent` implies disconnected -- lines aren't in sync:
334 if block.parent and (len(block) - block_length) != 0:
335 # Adjustment for block if modified in nested parse:
336 self.state_machine.next_line(len(block) - block_length)
337 return new_offset
338
339 def nested_list_parse(self, block, input_offset, node, initial_state,
340 blank_finish,
341 blank_finish_state=None,
342 extra_settings={},
343 match_titles=False,
344 state_machine_class=None,
345 state_machine_kwargs=None):
346 """
347 Parse the input `block` with a nested state-machine rooted at `node`.
348
349 Create a new StateMachine rooted at `node` and run it over the
350 input `block` (see also `nested_parse()`).
351 Also keep track of optional intermediate blank lines and the
352 required final one.
353
354 Return new offset and a boolean indicating whether there was a
355 blank final line.
356 """
357 if state_machine_class is None:
358 state_machine_class = self.nested_sm
359 if state_machine_kwargs is None:
360 state_machine_kwargs = self.nested_sm_kwargs.copy()
361 state_machine_kwargs['initial_state'] = initial_state
362 state_machine = state_machine_class(
363 debug=self.debug,
364 parent_state_machine=self.state_machine,
365 **state_machine_kwargs)
366 if blank_finish_state is None:
367 blank_finish_state = initial_state
368 state_machine.states[blank_finish_state].blank_finish = blank_finish
369 for key, value in extra_settings.items():
370 setattr(state_machine.states[initial_state], key, value)
371 state_machine.run(block, input_offset, memo=self.memo,
372 node=node, match_titles=match_titles)
373 blank_finish = state_machine.states[blank_finish_state].blank_finish
374 state_machine.unlink()
375 return state_machine.abs_line_offset(), blank_finish
376
377 def section(self, title, source, style, lineno, messages) -> None:
378 """Check for a valid subsection and create one if it checks out."""
379 if self.check_subsection(source, style, lineno):
380 self.new_subsection(title, lineno, messages)
381
382 def check_subsection(self, source, style, lineno) -> bool:
383 """
384 Check for a valid subsection header. Update section data in `memo`.
385
386 When a new section is reached that isn't a subsection of the current
387 section, set `self.parent` to the new section's parent section
388 (or the root node if the new section is a top-level section).
389 """
390 title_styles = self.memo.title_styles
391 parent_sections = self.parent.section_hierarchy()
392 # current section level: (0 root, 1 section, 2 subsection, ...)
393 oldlevel = len(parent_sections)
394 # new section level:
395 try: # check for existing title style
396 newlevel = title_styles.index(style) + 1
397 except ValueError: # new title style
398 newlevel = len(title_styles) + 1
399 # The new level must not be deeper than an immediate child
400 # of the current level:
401 if newlevel > oldlevel + 1:
402 styles = ' '.join('/'.join(style) for style in title_styles)
403 self.parent += self.reporter.error(
404 'Inconsistent title style:'
405 f' skip from level {oldlevel} to {newlevel}.',
406 nodes.literal_block('', source),
407 nodes.paragraph('', f'Established title styles: {styles}'),
408 line=lineno)
409 return False
410 # Update parent state:
411 if newlevel > len(title_styles):
412 title_styles.append(style)
413 self.memo.section_level = newlevel
414 if newlevel <= oldlevel:
415 # new section is sibling or higher up in the section hierarchy
416 self.parent = parent_sections[newlevel-1].parent
417 return True
418
419 def title_inconsistent(self, sourcetext, lineno):
420 # Ignored. Will be removed in Docutils 2.0.
421 error = self.reporter.error(
422 'Title level inconsistent:', nodes.literal_block('', sourcetext),
423 line=lineno)
424 return error
425
426 def new_subsection(self, title, lineno, messages):
427 """Append new subsection to document tree."""
428 section_node = nodes.section()
429 self.parent += section_node
430 textnodes, title_messages = self.inline_text(title, lineno)
431 titlenode = nodes.title(title, '', *textnodes)
432 name = normalize_name(titlenode.astext())
433 section_node['names'].append(name)
434 section_node += titlenode
435 section_node += messages
436 section_node += title_messages
437 self.document.note_implicit_target(section_node, section_node)
438 # Update state:
439 self.parent = section_node
440
441 def paragraph(self, lines, lineno):
442 """
443 Return a list (paragraph & messages) & a boolean: literal_block next?
444 """
445 data = '\n'.join(lines).rstrip()
446 if re.search(r'(?<!\\)(\\\\)*::$', data):
447 if len(data) == 2:
448 return [], 1
449 elif data[-3] in ' \n':
450 text = data[:-3].rstrip()
451 else:
452 text = data[:-1]
453 literalnext = 1
454 else:
455 text = data
456 literalnext = 0
457 textnodes, messages = self.inline_text(text, lineno)
458 p = nodes.paragraph(data, '', *textnodes)
459 p.source, p.line = self.state_machine.get_source_and_line(lineno)
460 return [p] + messages, literalnext
461
462 def inline_text(self, text, lineno):
463 """
464 Return 2 lists: nodes (text and inline elements), and system_messages.
465 """
466 nodes, messages = self.inliner.parse(text, lineno,
467 self.memo, self.parent)
468 return nodes, messages
469
470 def unindent_warning(self, node_name):
471 # the actual problem is one line below the current line
472 lineno = self.state_machine.abs_line_number() + 1
473 return self.reporter.warning('%s ends without a blank line; '
474 'unexpected unindent.' % node_name,
475 line=lineno)
476
477
478def build_regexp(definition, compile_patterns=True):
479 """
480 Build, compile and return a regular expression based on `definition`.
481
482 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
483 where "parts" is a list of regular expressions and/or regular
484 expression definitions to be joined into an or-group.
485 """
486 name, prefix, suffix, parts = definition
487 part_strings = []
488 for part in parts:
489 if isinstance(part, tuple):
490 part_strings.append(build_regexp(part, None))
491 else:
492 part_strings.append(part)
493 or_group = '|'.join(part_strings)
494 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
495 if compile_patterns:
496 return re.compile(regexp)
497 else:
498 return regexp
499
500
501class Inliner:
502
503 """
504 Parse inline markup; call the `parse()` method.
505 """
506
507 def __init__(self) -> None:
508 self.implicit_dispatch = []
509 """List of (pattern, bound method) tuples, used by
510 `self.implicit_inline`."""
511
512 def init_customizations(self, settings) -> None:
513 # lookahead and look-behind expressions for inline markup rules
514 if getattr(settings, 'character_level_inline_markup', False):
515 start_string_prefix = '(^|(?<!\x00))'
516 end_string_suffix = ''
517 else:
518 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %
519 (punctuation_chars.openers,
520 punctuation_chars.delimiters))
521 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %
522 (punctuation_chars.closing_delimiters,
523 punctuation_chars.delimiters,
524 punctuation_chars.closers))
525 args = locals().copy()
526 args.update(vars(self.__class__))
527
528 parts = ('initial_inline', start_string_prefix, '',
529 [
530 ('start', '', self.non_whitespace_after, # simple start-strings
531 [r'\*\*', # strong
532 r'\*(?!\*)', # emphasis but not strong
533 r'``', # literal
534 r'_`', # inline internal target
535 r'\|(?!\|)'] # substitution reference
536 ),
537 ('whole', '', end_string_suffix, # whole constructs
538 [ # reference name & end-string
539 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,
540 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
541 [r'[0-9]+', # manually numbered
542 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)
543 r'\*', # auto-symbol
544 r'(?P<citationlabel>%s)' % self.simplename, # citation ref
545 ]
546 )
547 ]
548 ),
549 ('backquote', # interpreted text or phrase reference
550 '(?P<role>(:%s:)?)' % self.simplename, # optional role
551 self.non_whitespace_after,
552 ['`(?!`)'] # but not literal
553 )
554 ]
555 )
556 self.start_string_prefix = start_string_prefix
557 self.end_string_suffix = end_string_suffix
558 self.parts = parts
559
560 self.patterns = Struct(
561 initial=build_regexp(parts),
562 emphasis=re.compile(self.non_whitespace_escape_before
563 + r'(\*)' + end_string_suffix),
564 strong=re.compile(self.non_whitespace_escape_before
565 + r'(\*\*)' + end_string_suffix),
566 interpreted_or_phrase_ref=re.compile(
567 r"""
568 %(non_unescaped_whitespace_escape_before)s
569 (
570 `
571 (?P<suffix>
572 (?P<role>:%(simplename)s:)?
573 (?P<refend>__?)?
574 )
575 )
576 %(end_string_suffix)s
577 """ % args, re.VERBOSE),
578 embedded_link=re.compile(
579 r"""
580 (
581 (?:[ \n]+|^) # spaces or beginning of line/string
582 < # open bracket
583 %(non_whitespace_after)s
584 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets
585 %(non_whitespace_escape_before)s
586 > # close bracket
587 )
588 $ # end of string
589 """ % args, re.VERBOSE),
590 literal=re.compile(self.non_whitespace_before + '(``)'
591 + end_string_suffix),
592 target=re.compile(self.non_whitespace_escape_before
593 + r'(`)' + end_string_suffix),
594 substitution_ref=re.compile(self.non_whitespace_escape_before
595 + r'(\|_{0,2})'
596 + end_string_suffix),
597 email=re.compile(self.email_pattern % args + '$',
598 re.VERBOSE),
599 uri=re.compile(
600 (r"""
601 %(start_string_prefix)s
602 (?P<whole>
603 (?P<absolute> # absolute URI
604 (?P<scheme> # scheme (http, ftp, mailto)
605 [a-zA-Z][a-zA-Z0-9.+-]*
606 )
607 :
608 (
609 ( # either:
610 (//?)? # hierarchical URI
611 %(uric)s* # URI characters
612 %(uri_end)s # final URI char
613 )
614 ( # optional query
615 \?%(uric)s*
616 %(uri_end)s
617 )?
618 ( # optional fragment
619 \#%(uric)s*
620 %(uri_end)s
621 )?
622 )
623 )
624 | # *OR*
625 (?P<email> # email address
626 """ + self.email_pattern + r"""
627 )
628 )
629 %(end_string_suffix)s
630 """) % args, re.VERBOSE),
631 pep=re.compile(
632 r"""
633 %(start_string_prefix)s
634 (
635 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
636 |
637 (PEP\s+(?P<pepnum2>\d+)) # reference by name
638 )
639 %(end_string_suffix)s""" % args, re.VERBOSE),
640 rfc=re.compile(
641 r"""
642 %(start_string_prefix)s
643 (RFC(-|\s+)?(?P<rfcnum>\d+))
644 %(end_string_suffix)s""" % args, re.VERBOSE))
645
646 self.implicit_dispatch.append((self.patterns.uri,
647 self.standalone_uri))
648 if settings.pep_references:
649 self.implicit_dispatch.append((self.patterns.pep,
650 self.pep_reference))
651 if settings.rfc_references:
652 self.implicit_dispatch.append((self.patterns.rfc,
653 self.rfc_reference))
654
655 def parse(self, text, lineno, memo, parent):
656 # Needs to be refactored for nested inline markup.
657 # Add nested_parse() method?
658 """
659 Return 2 lists: nodes (text and inline elements), and system_messages.
660
661 Using `self.patterns.initial`, a pattern which matches start-strings
662 (emphasis, strong, interpreted, phrase reference, literal,
663 substitution reference, and inline target) and complete constructs
664 (simple reference, footnote reference), search for a candidate. When
665 one is found, check for validity (e.g., not a quoted '*' character).
666 If valid, search for the corresponding end string if applicable, and
667 check it for validity. If not found or invalid, generate a warning
668 and ignore the start-string. Implicit inline markup (e.g. standalone
669 URIs) is found last.
670
671 :text: source string
672 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`
673 """
674 self.document = memo.document
675 self.language = memo.language
676 self.reporter = self.document.reporter
677 self.parent = parent
678 pattern_search = self.patterns.initial.search
679 dispatch = self.dispatch
680 remaining = escape2null(text)
681 processed = []
682 unprocessed = []
683 messages = []
684 while remaining:
685 match = pattern_search(remaining)
686 if match:
687 groups = match.groupdict()
688 method = dispatch[groups['start'] or groups['backquote']
689 or groups['refend'] or groups['fnend']]
690 before, inlines, remaining, sysmessages = method(self, match,
691 lineno)
692 unprocessed.append(before)
693 messages += sysmessages
694 if inlines:
695 processed += self.implicit_inline(''.join(unprocessed),
696 lineno)
697 processed += inlines
698 unprocessed = []
699 else:
700 break
701 remaining = ''.join(unprocessed) + remaining
702 if remaining:
703 processed += self.implicit_inline(remaining, lineno)
704 return processed, messages
705
706 # Inline object recognition
707 # -------------------------
708 # See also init_customizations().
709 non_whitespace_before = r'(?<!\s)'
710 non_whitespace_escape_before = r'(?<![\s\x00])'
711 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'
712 non_whitespace_after = r'(?!\s)'
713 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
714 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
715 # Valid URI characters (see RFC 2396 & RFC 2732);
716 # final \x00 allows backslash escapes in URIs:
717 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
718 # Delimiter indicating the end of a URI (not part of the URI):
719 uri_end_delim = r"""[>]"""
720 # Last URI character; same as uric but no punctuation:
721 urilast = r"""[_~*/=+a-zA-Z0-9]"""
722 # End of a URI (either 'urilast' or 'uric followed by a
723 # uri_end_delim'):
724 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
725 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
726 email_pattern = r"""
727 %(emailc)s+(?:\.%(emailc)s+)* # name
728 (?<!\x00)@ # at
729 %(emailc)s+(?:\.%(emailc)s*)* # host
730 %(uri_end)s # final URI char
731 """
732
733 def quoted_start(self, match):
734 """Test if inline markup start-string is 'quoted'.
735
736 'Quoted' in this context means the start-string is enclosed in a pair
737 of matching opening/closing delimiters (not necessarily quotes)
738 or at the end of the match.
739 """
740 string = match.string
741 start = match.start()
742 if start == 0: # start-string at beginning of text
743 return False
744 prestart = string[start - 1]
745 try:
746 poststart = string[match.end()]
747 except IndexError: # start-string at end of text
748 return True # not "quoted" but no markup start-string either
749 return punctuation_chars.match_chars(prestart, poststart)
750
751 def inline_obj(self, match, lineno, end_pattern, nodeclass,
752 restore_backslashes=False):
753 string = match.string
754 matchstart = match.start('start')
755 matchend = match.end('start')
756 if self.quoted_start(match):
757 return string[:matchend], [], string[matchend:], [], ''
758 endmatch = end_pattern.search(string[matchend:])
759 if endmatch and endmatch.start(1): # 1 or more chars
760 text = endmatch.string[:endmatch.start(1)]
761 if restore_backslashes:
762 text = unescape(text, True)
763 textend = matchend + endmatch.end(1)
764 rawsource = unescape(string[matchstart:textend], True)
765 node = nodeclass(rawsource, text)
766 return (string[:matchstart], [node],
767 string[textend:], [], endmatch.group(1))
768 msg = self.reporter.warning(
769 'Inline %s start-string without end-string.'
770 % nodeclass.__name__, line=lineno)
771 text = unescape(string[matchstart:matchend], True)
772 prb = self.problematic(text, text, msg)
773 return string[:matchstart], [prb], string[matchend:], [msg], ''
774
775 def problematic(self, text, rawsource, message):
776 msgid = self.document.set_id(message, self.parent)
777 problematic = nodes.problematic(rawsource, text, refid=msgid)
778 prbid = self.document.set_id(problematic)
779 message.add_backref(prbid)
780 return problematic
781
782 def emphasis(self, match, lineno):
783 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
784 match, lineno, self.patterns.emphasis, nodes.emphasis)
785 return before, inlines, remaining, sysmessages
786
787 def strong(self, match, lineno):
788 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
789 match, lineno, self.patterns.strong, nodes.strong)
790 return before, inlines, remaining, sysmessages
791
792 def interpreted_or_phrase_ref(self, match, lineno):
793 end_pattern = self.patterns.interpreted_or_phrase_ref
794 string = match.string
795 matchstart = match.start('backquote')
796 matchend = match.end('backquote')
797 rolestart = match.start('role')
798 role = match.group('role')
799 position = ''
800 if role:
801 role = role[1:-1]
802 position = 'prefix'
803 elif self.quoted_start(match):
804 return string[:matchend], [], string[matchend:], []
805 endmatch = end_pattern.search(string[matchend:])
806 if endmatch and endmatch.start(1): # 1 or more chars
807 textend = matchend + endmatch.end()
808 if endmatch.group('role'):
809 if role:
810 msg = self.reporter.warning(
811 'Multiple roles in interpreted text (both '
812 'prefix and suffix present; only one allowed).',
813 line=lineno)
814 text = unescape(string[rolestart:textend], True)
815 prb = self.problematic(text, text, msg)
816 return string[:rolestart], [prb], string[textend:], [msg]
817 role = endmatch.group('suffix')[1:-1]
818 position = 'suffix'
819 escaped = endmatch.string[:endmatch.start(1)]
820 rawsource = unescape(string[matchstart:textend], True)
821 if rawsource[-1:] == '_':
822 if role:
823 msg = self.reporter.warning(
824 'Mismatch: both interpreted text role %s and '
825 'reference suffix.' % position, line=lineno)
826 text = unescape(string[rolestart:textend], True)
827 prb = self.problematic(text, text, msg)
828 return string[:rolestart], [prb], string[textend:], [msg]
829 return self.phrase_ref(string[:matchstart], string[textend:],
830 rawsource, escaped)
831 else:
832 rawsource = unescape(string[rolestart:textend], True)
833 nodelist, messages = self.interpreted(rawsource, escaped, role,
834 lineno)
835 return (string[:rolestart], nodelist,
836 string[textend:], messages)
837 msg = self.reporter.warning(
838 'Inline interpreted text or phrase reference start-string '
839 'without end-string.', line=lineno)
840 text = unescape(string[matchstart:matchend], True)
841 prb = self.problematic(text, text, msg)
842 return string[:matchstart], [prb], string[matchend:], [msg]
843
844 def phrase_ref(self, before, after, rawsource, escaped, text=None):
845 # `text` is ignored (since 0.16)
846 match = self.patterns.embedded_link.search(escaped)
847 if match: # embedded <URI> or <alias_>
848 text = escaped[:match.start(0)]
849 unescaped = unescape(text)
850 rawtext = unescape(text, True)
851 aliastext = match.group(2)
852 rawaliastext = unescape(aliastext, True)
853 underscore_escaped = rawaliastext.endswith(r'\_')
854 if (aliastext.endswith('_')
855 and not (underscore_escaped
856 or self.patterns.uri.match(aliastext))):
857 aliastype = 'name'
858 alias = normalize_name(unescape(aliastext[:-1]))
859 target = nodes.target(match.group(1), refname=alias)
860 target.indirect_reference_name = whitespace_normalize_name(
861 unescape(aliastext[:-1]))
862 else:
863 aliastype = 'uri'
864 # remove unescaped whitespace
865 alias_parts = split_escaped_whitespace(match.group(2))
866 alias = ' '.join(''.join(part.split())
867 for part in alias_parts)
868 alias = self.adjust_uri(unescape(alias))
869 if alias.endswith(r'\_'):
870 alias = alias[:-2] + '_'
871 target = nodes.target(match.group(1), refuri=alias)
872 target.referenced = 1
873 if not aliastext:
874 raise ApplicationError('problem with embedded link: %r'
875 % aliastext)
876 if not text:
877 text = alias
878 unescaped = unescape(text)
879 rawtext = rawaliastext
880 else:
881 text = escaped
882 unescaped = unescape(text)
883 target = None
884 rawtext = unescape(escaped, True)
885
886 refname = normalize_name(unescaped)
887 reference = nodes.reference(rawsource, text,
888 name=whitespace_normalize_name(unescaped))
889 reference[0].rawsource = rawtext
890
891 node_list = [reference]
892
893 if rawsource[-2:] == '__':
894 if target and (aliastype == 'name'):
895 reference['refname'] = alias
896 self.document.note_refname(reference)
897 # self.document.note_indirect_target(target) # required?
898 elif target and (aliastype == 'uri'):
899 reference['refuri'] = alias
900 else:
901 reference['anonymous'] = True
902 else:
903 if target:
904 target['names'].append(refname)
905 if aliastype == 'name':
906 reference['refname'] = alias
907 self.document.note_indirect_target(target)
908 self.document.note_refname(reference)
909 else:
910 reference['refuri'] = alias
911 # target.note_referenced_by(name=refname)
912 self.document.note_implicit_target(target, self.parent)
913 node_list.append(target)
914 else:
915 reference['refname'] = refname
916 self.document.note_refname(reference)
917 return before, node_list, after, []
918
919 def adjust_uri(self, uri):
920 match = self.patterns.email.match(uri)
921 if match:
922 return 'mailto:' + uri
923 else:
924 return uri
925
926 def interpreted(self, rawsource, text, role, lineno):
927 role_fn, messages = roles.role(role, self.language, lineno,
928 self.reporter)
929 if role_fn:
930 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
931 return nodes, messages + messages2
932 else:
933 msg = self.reporter.error(
934 'Unknown interpreted text role "%s".' % role,
935 line=lineno)
936 return ([self.problematic(rawsource, rawsource, msg)],
937 messages + [msg])
938
939 def literal(self, match, lineno):
940 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
941 match, lineno, self.patterns.literal, nodes.literal,
942 restore_backslashes=True)
943 return before, inlines, remaining, sysmessages
944
945 def inline_internal_target(self, match, lineno):
946 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
947 match, lineno, self.patterns.target, nodes.target)
948 if inlines and isinstance(inlines[0], nodes.target):
949 assert len(inlines) == 1
950 target = inlines[0]
951 name = normalize_name(target.astext())
952 target['names'].append(name)
953 self.document.note_explicit_target(target, self.parent)
954 return before, inlines, remaining, sysmessages
955
956 def substitution_reference(self, match, lineno):
957 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
958 match, lineno, self.patterns.substitution_ref,
959 nodes.substitution_reference)
960 if len(inlines) == 1:
961 subref_node = inlines[0]
962 if isinstance(subref_node, nodes.substitution_reference):
963 subref_text = subref_node.astext()
964 self.document.note_substitution_ref(subref_node, subref_text)
965 if endstring[-1:] == '_':
966 reference_node = nodes.reference(
967 '|%s%s' % (subref_text, endstring), '')
968 if endstring[-2:] == '__':
969 reference_node['anonymous'] = True
970 else:
971 reference_node['refname'] = normalize_name(subref_text)
972 self.document.note_refname(reference_node)
973 reference_node += subref_node
974 inlines = [reference_node]
975 return before, inlines, remaining, sysmessages
976
977 def footnote_reference(self, match, lineno):
978 """
979 Handles `nodes.footnote_reference` and `nodes.citation_reference`
980 elements.
981 """
982 label = match.group('footnotelabel')
983 refname = normalize_name(label)
984 string = match.string
985 before = string[:match.start('whole')]
986 remaining = string[match.end('whole'):]
987 if match.group('citationlabel'):
988 refnode = nodes.citation_reference('[%s]_' % label,
989 refname=refname)
990 refnode += nodes.Text(label)
991 self.document.note_citation_ref(refnode)
992 else:
993 refnode = nodes.footnote_reference('[%s]_' % label)
994 if refname[0] == '#':
995 refname = refname[1:]
996 refnode['auto'] = 1
997 self.document.note_autofootnote_ref(refnode)
998 elif refname == '*':
999 refname = ''
1000 refnode['auto'] = '*'
1001 self.document.note_symbol_footnote_ref(
1002 refnode)
1003 else:
1004 refnode += nodes.Text(label)
1005 if refname:
1006 refnode['refname'] = refname
1007 self.document.note_footnote_ref(refnode)
1008 if utils.get_trim_footnote_ref_space(self.document.settings):
1009 before = before.rstrip()
1010 return before, [refnode], remaining, []
1011
1012 def reference(self, match, lineno, anonymous=False):
1013 referencename = match.group('refname')
1014 refname = normalize_name(referencename)
1015 referencenode = nodes.reference(
1016 referencename + match.group('refend'), referencename,
1017 name=whitespace_normalize_name(referencename))
1018 referencenode[0].rawsource = referencename
1019 if anonymous:
1020 referencenode['anonymous'] = True
1021 else:
1022 referencenode['refname'] = refname
1023 self.document.note_refname(referencenode)
1024 string = match.string
1025 matchstart = match.start('whole')
1026 matchend = match.end('whole')
1027 return string[:matchstart], [referencenode], string[matchend:], []
1028
1029 def anonymous_reference(self, match, lineno):
1030 return self.reference(match, lineno, anonymous=True)
1031
1032 def standalone_uri(self, match, lineno):
1033 if (not match.group('scheme')
1034 or match.group('scheme').lower() in urischemes.schemes):
1035 if match.group('email'):
1036 addscheme = 'mailto:'
1037 else:
1038 addscheme = ''
1039 text = match.group('whole')
1040 refuri = addscheme + unescape(text)
1041 reference = nodes.reference(unescape(text, True), text,
1042 refuri=refuri)
1043 return [reference]
1044 else: # not a valid scheme
1045 raise MarkupMismatch
1046
1047 def pep_reference(self, match, lineno):
1048 text = match.group(0)
1049 if text.startswith('pep-'):
1050 pepnum = int(unescape(match.group('pepnum1')))
1051 elif text.startswith('PEP'):
1052 pepnum = int(unescape(match.group('pepnum2')))
1053 else:
1054 raise MarkupMismatch
1055 ref = (self.document.settings.pep_base_url
1056 + self.document.settings.pep_file_url_template % pepnum)
1057 return [nodes.reference(unescape(text, True), text, refuri=ref)]
1058
1059 rfc_url = 'rfc%d.html'
1060
1061 def rfc_reference(self, match, lineno):
1062 text = match.group(0)
1063 if text.startswith('RFC'):
1064 rfcnum = int(unescape(match.group('rfcnum')))
1065 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
1066 else:
1067 raise MarkupMismatch
1068 return [nodes.reference(unescape(text, True), text, refuri=ref)]
1069
1070 def implicit_inline(self, text, lineno):
1071 """
1072 Check each of the patterns in `self.implicit_dispatch` for a match,
1073 and dispatch to the stored method for the pattern. Recursively check
1074 the text before and after the match. Return a list of `nodes.Text`
1075 and inline element nodes.
1076 """
1077 if not text:
1078 return []
1079 for pattern, method in self.implicit_dispatch:
1080 match = pattern.search(text)
1081 if match:
1082 try:
1083 # Must recurse on strings before *and* after the match;
1084 # there may be multiple patterns.
1085 return (self.implicit_inline(text[:match.start()], lineno)
1086 + method(match, lineno)
1087 + self.implicit_inline(text[match.end():], lineno))
1088 except MarkupMismatch:
1089 pass
1090 return [nodes.Text(text)]
1091
1092 dispatch = {'*': emphasis,
1093 '**': strong,
1094 '`': interpreted_or_phrase_ref,
1095 '``': literal,
1096 '_`': inline_internal_target,
1097 ']_': footnote_reference,
1098 '|': substitution_reference,
1099 '_': reference,
1100 '__': anonymous_reference}
1101
1102
1103def _loweralpha_to_int(s, _zero=(ord('a')-1)):
1104 return ord(s) - _zero
1105
1106
1107def _upperalpha_to_int(s, _zero=(ord('A')-1)):
1108 return ord(s) - _zero
1109
1110
1111class Body(RSTState):
1112
1113 """
1114 Generic classifier of the first line of a block.
1115 """
1116
1117 double_width_pad_char = tableparser.TableParser.double_width_pad_char
1118 """Padding character for East Asian double-width text."""
1119
1120 enum = Struct()
1121 """Enumerated list parsing information."""
1122
1123 enum.formatinfo = {
1124 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
1125 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
1126 'period': Struct(prefix='', suffix='.', start=0, end=-1)}
1127 enum.formats = enum.formatinfo.keys()
1128 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
1129 'lowerroman', 'upperroman'] # ORDERED!
1130 enum.sequencepats = {'arabic': '[0-9]+',
1131 'loweralpha': '[a-z]',
1132 'upperalpha': '[A-Z]',
1133 'lowerroman': '[ivxlcdm]+',
1134 'upperroman': '[IVXLCDM]+'}
1135 enum.converters = {'arabic': int,
1136 'loweralpha': _loweralpha_to_int,
1137 'upperalpha': _upperalpha_to_int,
1138 'lowerroman': RomanNumeral.from_string,
1139 'upperroman': RomanNumeral.from_string}
1140
1141 enum.sequenceregexps = {}
1142 for sequence in enum.sequences:
1143 enum.sequenceregexps[sequence] = re.compile(
1144 enum.sequencepats[sequence] + '$')
1145
1146 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
1147 """Matches the top (& bottom) of a full table)."""
1148
1149 simple_table_top_pat = re.compile('=+( +=+)+ *$')
1150 """Matches the top of a simple table."""
1151
1152 simple_table_border_pat = re.compile('=+[ =]*$')
1153 """Matches the bottom & header bottom of a simple table."""
1154
1155 pats = {}
1156 """Fragments of patterns used by transitions."""
1157
1158 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1159 pats['alpha'] = '[a-zA-Z]'
1160 pats['alphanum'] = '[a-zA-Z0-9]'
1161 pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1162 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1163 '|%(upperroman)s|#)' % enum.sequencepats)
1164 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1165 # @@@ Loosen up the pattern? Allow Unicode?
1166 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1167 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1168 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1169 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1170
1171 for format in enum.formats:
1172 pats[format] = '(?P<%s>%s%s%s)' % (
1173 format, re.escape(enum.formatinfo[format].prefix),
1174 pats['enum'], re.escape(enum.formatinfo[format].suffix))
1175
1176 patterns = {
1177 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',
1178 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1179 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',
1180 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
1181 'doctest': r'>>>( +|$)',
1182 'line_block': r'\|( +|$)',
1183 'grid_table_top': grid_table_top_pat,
1184 'simple_table_top': simple_table_top_pat,
1185 'explicit_markup': r'\.\.( +|$)',
1186 'anonymous': r'__( +|$)',
1187 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
1188 'text': r''}
1189 initial_transitions = (
1190 'bullet',
1191 'enumerator',
1192 'field_marker',
1193 'option_marker',
1194 'doctest',
1195 'line_block',
1196 'grid_table_top',
1197 'simple_table_top',
1198 'explicit_markup',
1199 'anonymous',
1200 'line',
1201 'text')
1202
1203 def indent(self, match, context, next_state):
1204 """Block quote."""
1205 (indented, indent, line_offset, blank_finish
1206 ) = self.state_machine.get_indented()
1207 elements = self.block_quote(indented, line_offset)
1208 self.parent += elements
1209 if not blank_finish:
1210 self.parent += self.unindent_warning('Block quote')
1211 return context, next_state, []
1212
1213 def block_quote(self, indented, line_offset):
1214 elements = []
1215 while indented:
1216 blockquote = nodes.block_quote(rawsource='\n'.join(indented))
1217 (blockquote.source, blockquote.line
1218 ) = self.state_machine.get_source_and_line(line_offset+1)
1219 (blockquote_lines,
1220 attribution_lines,
1221 attribution_offset,
1222 indented,
1223 new_line_offset) = self.split_attribution(indented, line_offset)
1224 self.nested_parse(blockquote_lines, line_offset, blockquote)
1225 elements.append(blockquote)
1226 if attribution_lines:
1227 attribution, messages = self.parse_attribution(
1228 attribution_lines, line_offset+attribution_offset)
1229 blockquote += attribution
1230 elements += messages
1231 line_offset = new_line_offset
1232 while indented and not indented[0]:
1233 indented = indented[1:]
1234 line_offset += 1
1235 return elements
1236
1237 # U+2014 is an em-dash:
1238 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')
1239
1240 def split_attribution(self, indented, line_offset):
1241 """
1242 Check for a block quote attribution and split it off:
1243
1244 * First line after a blank line must begin with a dash ("--", "---",
1245 em-dash; matches `self.attribution_pattern`).
1246 * Every line after that must have consistent indentation.
1247 * Attributions must be preceded by block quote content.
1248
1249 Return a tuple of: (block quote content lines, attribution lines,
1250 attribution offset, remaining indented lines, remaining lines offset).
1251 """
1252 blank = None
1253 nonblank_seen = False
1254 for i in range(len(indented)):
1255 line = indented[i].rstrip()
1256 if line:
1257 if nonblank_seen and blank == i - 1: # last line blank
1258 match = self.attribution_pattern.match(line)
1259 if match:
1260 attribution_end, indent = self.check_attribution(
1261 indented, i)
1262 if attribution_end:
1263 a_lines = indented[i:attribution_end]
1264 a_lines.trim_left(match.end(), end=1)
1265 a_lines.trim_left(indent, start=1)
1266 return (indented[:i], a_lines,
1267 i, indented[attribution_end:],
1268 line_offset + attribution_end)
1269 nonblank_seen = True
1270 else:
1271 blank = i
1272 else:
1273 return indented, None, None, None, None
1274
1275 def check_attribution(self, indented, attribution_start):
1276 """
1277 Check attribution shape.
1278 Return the index past the end of the attribution, and the indent.
1279 """
1280 indent = None
1281 i = attribution_start + 1
1282 for i in range(attribution_start + 1, len(indented)):
1283 line = indented[i].rstrip()
1284 if not line:
1285 break
1286 if indent is None:
1287 indent = len(line) - len(line.lstrip())
1288 elif len(line) - len(line.lstrip()) != indent:
1289 return None, None # bad shape; not an attribution
1290 else:
1291 # return index of line after last attribution line:
1292 i += 1
1293 return i, (indent or 0)
1294
1295 def parse_attribution(self, indented, line_offset):
1296 text = '\n'.join(indented).rstrip()
1297 lineno = 1 + line_offset # line_offset is zero-based
1298 textnodes, messages = self.inline_text(text, lineno)
1299 node = nodes.attribution(text, '', *textnodes)
1300 node.source, node.line = self.state_machine.get_source_and_line(lineno)
1301 return node, messages
1302
1303 def bullet(self, match, context, next_state):
1304 """Bullet list item."""
1305 ul = nodes.bullet_list()
1306 ul.source, ul.line = self.state_machine.get_source_and_line()
1307 self.parent += ul
1308 ul['bullet'] = match.string[0]
1309 i, blank_finish = self.list_item(match.end())
1310 ul += i
1311 offset = self.state_machine.line_offset + 1 # next line
1312 new_line_offset, blank_finish = self.nested_list_parse(
1313 self.state_machine.input_lines[offset:],
1314 input_offset=self.state_machine.abs_line_offset() + 1,
1315 node=ul, initial_state='BulletList',
1316 blank_finish=blank_finish)
1317 self.goto_line(new_line_offset)
1318 if not blank_finish:
1319 self.parent += self.unindent_warning('Bullet list')
1320 return [], next_state, []
1321
1322 def list_item(self, indent):
1323 src, srcline = self.state_machine.get_source_and_line()
1324 if self.state_machine.line[indent:]:
1325 indented, line_offset, blank_finish = (
1326 self.state_machine.get_known_indented(indent))
1327 else:
1328 indented, indent, line_offset, blank_finish = (
1329 self.state_machine.get_first_known_indented(indent))
1330 listitem = nodes.list_item('\n'.join(indented))
1331 listitem.source, listitem.line = src, srcline
1332 if indented:
1333 self.nested_parse(indented, input_offset=line_offset,
1334 node=listitem)
1335 return listitem, blank_finish
1336
1337 def enumerator(self, match, context, next_state):
1338 """Enumerated List Item"""
1339 format, sequence, text, ordinal = self.parse_enumerator(match)
1340 if not self.is_enumerated_list_item(ordinal, sequence, format):
1341 raise statemachine.TransitionCorrection('text')
1342 enumlist = nodes.enumerated_list()
1343 (enumlist.source,
1344 enumlist.line) = self.state_machine.get_source_and_line()
1345 self.parent += enumlist
1346 if sequence == '#':
1347 enumlist['enumtype'] = 'arabic'
1348 else:
1349 enumlist['enumtype'] = sequence
1350 enumlist['prefix'] = self.enum.formatinfo[format].prefix
1351 enumlist['suffix'] = self.enum.formatinfo[format].suffix
1352 if ordinal != 1:
1353 enumlist['start'] = ordinal
1354 msg = self.reporter.info(
1355 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1356 % (text, ordinal), base_node=enumlist)
1357 self.parent += msg
1358 listitem, blank_finish = self.list_item(match.end())
1359 enumlist += listitem
1360 offset = self.state_machine.line_offset + 1 # next line
1361 newline_offset, blank_finish = self.nested_list_parse(
1362 self.state_machine.input_lines[offset:],
1363 input_offset=self.state_machine.abs_line_offset() + 1,
1364 node=enumlist, initial_state='EnumeratedList',
1365 blank_finish=blank_finish,
1366 extra_settings={'lastordinal': ordinal,
1367 'format': format,
1368 'auto': sequence == '#'})
1369 self.goto_line(newline_offset)
1370 if not blank_finish:
1371 self.parent += self.unindent_warning('Enumerated list')
1372 return [], next_state, []
1373
1374 def parse_enumerator(self, match, expected_sequence=None):
1375 """
1376 Analyze an enumerator and return the results.
1377
1378 :Return:
1379 - the enumerator format ('period', 'parens', or 'rparen'),
1380 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1381 - the text of the enumerator, stripped of formatting, and
1382 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1383 ``None`` is returned for invalid enumerator text).
1384
1385 The enumerator format has already been determined by the regular
1386 expression match. If `expected_sequence` is given, that sequence is
1387 tried first. If not, we check for Roman numeral 1. This way,
1388 single-character Roman numerals (which are also alphabetical) can be
1389 matched. If no sequence has been matched, all sequences are checked in
1390 order.
1391 """
1392 groupdict = match.groupdict()
1393 sequence = ''
1394 for format in self.enum.formats:
1395 if groupdict[format]: # was this the format matched?
1396 break # yes; keep `format`
1397 else: # shouldn't happen
1398 raise ParserError('enumerator format not matched')
1399 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501
1400 : self.enum.formatinfo[format].end]
1401 if text == '#':
1402 sequence = '#'
1403 elif expected_sequence:
1404 try:
1405 if self.enum.sequenceregexps[expected_sequence].match(text):
1406 sequence = expected_sequence
1407 except KeyError: # shouldn't happen
1408 raise ParserError('unknown enumerator sequence: %s'
1409 % sequence)
1410 elif text == 'i':
1411 sequence = 'lowerroman'
1412 elif text == 'I':
1413 sequence = 'upperroman'
1414 if not sequence:
1415 for sequence in self.enum.sequences:
1416 if self.enum.sequenceregexps[sequence].match(text):
1417 break
1418 else: # shouldn't happen
1419 raise ParserError('enumerator sequence not matched')
1420 if sequence == '#':
1421 ordinal = 1
1422 else:
1423 try:
1424 ordinal = int(self.enum.converters[sequence](text))
1425 except InvalidRomanNumeralError:
1426 ordinal = None
1427 return format, sequence, text, ordinal
1428
1429 def is_enumerated_list_item(self, ordinal, sequence, format):
1430 """
1431 Check validity based on the ordinal value and the second line.
1432
1433 Return true if the ordinal is valid and the second line is blank,
1434 indented, or starts with the next enumerator or an auto-enumerator.
1435 """
1436 if ordinal is None:
1437 return None
1438 try:
1439 next_line = self.state_machine.next_line()
1440 except EOFError: # end of input lines
1441 self.state_machine.previous_line()
1442 return 1
1443 else:
1444 self.state_machine.previous_line()
1445 if not next_line[:1].strip(): # blank or indented
1446 return 1
1447 result = self.make_enumerator(ordinal + 1, sequence, format)
1448 if result:
1449 next_enumerator, auto_enumerator = result
1450 try:
1451 if next_line.startswith((next_enumerator, auto_enumerator)):
1452 return 1
1453 except TypeError:
1454 pass
1455 return None
1456
1457 def make_enumerator(self, ordinal, sequence, format):
1458 """
1459 Construct and return the next enumerated list item marker, and an
1460 auto-enumerator ("#" instead of the regular enumerator).
1461
1462 Return ``None`` for invalid (out of range) ordinals.
1463 """
1464 if sequence == '#':
1465 enumerator = '#'
1466 elif sequence == 'arabic':
1467 enumerator = str(ordinal)
1468 else:
1469 if sequence.endswith('alpha'):
1470 if ordinal > 26:
1471 return None
1472 enumerator = chr(ordinal + ord('a') - 1)
1473 elif sequence.endswith('roman'):
1474 try:
1475 enumerator = RomanNumeral(ordinal).to_uppercase()
1476 except TypeError:
1477 return None
1478 else: # shouldn't happen
1479 raise ParserError('unknown enumerator sequence: "%s"'
1480 % sequence)
1481 if sequence.startswith('lower'):
1482 enumerator = enumerator.lower()
1483 elif sequence.startswith('upper'):
1484 enumerator = enumerator.upper()
1485 else: # shouldn't happen
1486 raise ParserError('unknown enumerator sequence: "%s"'
1487 % sequence)
1488 formatinfo = self.enum.formatinfo[format]
1489 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
1490 + ' ')
1491 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
1492 return next_enumerator, auto_enumerator
1493
1494 def field_marker(self, match, context, next_state):
1495 """Field list item."""
1496 field_list = nodes.field_list()
1497 self.parent += field_list
1498 field, blank_finish = self.field(match)
1499 field_list += field
1500 offset = self.state_machine.line_offset + 1 # next line
1501 newline_offset, blank_finish = self.nested_list_parse(
1502 self.state_machine.input_lines[offset:],
1503 input_offset=self.state_machine.abs_line_offset() + 1,
1504 node=field_list, initial_state='FieldList',
1505 blank_finish=blank_finish)
1506 self.goto_line(newline_offset)
1507 if not blank_finish:
1508 self.parent += self.unindent_warning('Field list')
1509 return [], next_state, []
1510
1511 def field(self, match):
1512 name = self.parse_field_marker(match)
1513 src, srcline = self.state_machine.get_source_and_line()
1514 lineno = self.state_machine.abs_line_number()
1515 (indented, indent, line_offset, blank_finish
1516 ) = self.state_machine.get_first_known_indented(match.end())
1517 field_node = nodes.field()
1518 field_node.source = src
1519 field_node.line = srcline
1520 name_nodes, name_messages = self.inline_text(name, lineno)
1521 field_node += nodes.field_name(name, '', *name_nodes)
1522 field_body = nodes.field_body('\n'.join(indented), *name_messages)
1523 field_node += field_body
1524 if indented:
1525 self.parse_field_body(indented, line_offset, field_body)
1526 return field_node, blank_finish
1527
1528 def parse_field_marker(self, match):
1529 """Extract & return field name from a field marker match."""
1530 field = match.group()[1:] # strip off leading ':'
1531 field = field[:field.rfind(':')] # strip off trailing ':' etc.
1532 return field
1533
1534 def parse_field_body(self, indented, offset, node) -> None:
1535 self.nested_parse(indented, input_offset=offset, node=node)
1536
1537 def option_marker(self, match, context, next_state):
1538 """Option list item."""
1539 optionlist = nodes.option_list()
1540 (optionlist.source, optionlist.line
1541 ) = self.state_machine.get_source_and_line()
1542 try:
1543 listitem, blank_finish = self.option_list_item(match)
1544 except MarkupError as error:
1545 # This shouldn't happen; pattern won't match.
1546 msg = self.reporter.error('Invalid option list marker: %s'
1547 % error)
1548 self.parent += msg
1549 (indented, indent, line_offset, blank_finish
1550 ) = self.state_machine.get_first_known_indented(match.end())
1551 elements = self.block_quote(indented, line_offset)
1552 self.parent += elements
1553 if not blank_finish:
1554 self.parent += self.unindent_warning('Option list')
1555 return [], next_state, []
1556 self.parent += optionlist
1557 optionlist += listitem
1558 offset = self.state_machine.line_offset + 1 # next line
1559 newline_offset, blank_finish = self.nested_list_parse(
1560 self.state_machine.input_lines[offset:],
1561 input_offset=self.state_machine.abs_line_offset() + 1,
1562 node=optionlist, initial_state='OptionList',
1563 blank_finish=blank_finish)
1564 self.goto_line(newline_offset)
1565 if not blank_finish:
1566 self.parent += self.unindent_warning('Option list')
1567 return [], next_state, []
1568
1569 def option_list_item(self, match):
1570 offset = self.state_machine.abs_line_offset()
1571 options = self.parse_option_marker(match)
1572 (indented, indent, line_offset, blank_finish
1573 ) = self.state_machine.get_first_known_indented(match.end())
1574 if not indented: # not an option list item
1575 self.goto_line(offset)
1576 raise statemachine.TransitionCorrection('text')
1577 option_group = nodes.option_group('', *options)
1578 description = nodes.description('\n'.join(indented))
1579 option_list_item = nodes.option_list_item('', option_group,
1580 description)
1581 if indented:
1582 self.nested_parse(indented, input_offset=line_offset,
1583 node=description)
1584 return option_list_item, blank_finish
1585
1586 def parse_option_marker(self, match):
1587 """
1588 Return a list of `node.option` and `node.option_argument` objects,
1589 parsed from an option marker match.
1590
1591 :Exception: `MarkupError` for invalid option markers.
1592 """
1593 optlist = []
1594 # split at ", ", except inside < > (complex arguments)
1595 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())
1596 for optionstring in optionstrings:
1597 tokens = optionstring.split()
1598 delimiter = ' '
1599 firstopt = tokens[0].split('=', 1)
1600 if len(firstopt) > 1:
1601 # "--opt=value" form
1602 tokens[:1] = firstopt
1603 delimiter = '='
1604 elif (len(tokens[0]) > 2
1605 and ((tokens[0].startswith('-')
1606 and not tokens[0].startswith('--'))
1607 or tokens[0].startswith('+'))):
1608 # "-ovalue" form
1609 tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1610 delimiter = ''
1611 if len(tokens) > 1 and (tokens[1].startswith('<')
1612 and tokens[-1].endswith('>')):
1613 # "-o <value1 value2>" form; join all values into one token
1614 tokens[1:] = [' '.join(tokens[1:])]
1615 if 0 < len(tokens) <= 2:
1616 option = nodes.option(optionstring)
1617 option += nodes.option_string(tokens[0], tokens[0])
1618 if len(tokens) > 1:
1619 option += nodes.option_argument(tokens[1], tokens[1],
1620 delimiter=delimiter)
1621 optlist.append(option)
1622 else:
1623 raise MarkupError(
1624 'wrong number of option tokens (=%s), should be 1 or 2: '
1625 '"%s"' % (len(tokens), optionstring))
1626 return optlist
1627
1628 def doctest(self, match, context, next_state):
1629 line = self.document.current_line
1630 data = '\n'.join(self.state_machine.get_text_block())
1631 # TODO: Parse with `directives.body.CodeBlock` with
1632 # argument 'pycon' (Python Console) in Docutils 1.0.
1633 n = nodes.doctest_block(data, data)
1634 n.line = line
1635 self.parent += n
1636 return [], next_state, []
1637
1638 def line_block(self, match, context, next_state):
1639 """First line of a line block."""
1640 block = nodes.line_block()
1641 self.parent += block
1642 lineno = self.state_machine.abs_line_number()
1643 (block.source,
1644 block.line) = self.state_machine.get_source_and_line(lineno)
1645 line, messages, blank_finish = self.line_block_line(match, lineno)
1646 block += line
1647 self.parent += messages
1648 if not blank_finish:
1649 offset = self.state_machine.line_offset + 1 # next line
1650 new_line_offset, blank_finish = self.nested_list_parse(
1651 self.state_machine.input_lines[offset:],
1652 input_offset=self.state_machine.abs_line_offset() + 1,
1653 node=block, initial_state='LineBlock',
1654 blank_finish=False)
1655 self.goto_line(new_line_offset)
1656 if not blank_finish:
1657 self.parent += self.reporter.warning(
1658 'Line block ends without a blank line.',
1659 line=lineno+1)
1660 if len(block):
1661 if block[0].indent is None:
1662 block[0].indent = 0
1663 self.nest_line_block_lines(block)
1664 return [], next_state, []
1665
1666 def line_block_line(self, match, lineno):
1667 """Return one line element of a line_block."""
1668 (indented, indent, line_offset, blank_finish
1669 ) = self.state_machine.get_first_known_indented(match.end(),
1670 until_blank=True)
1671 text = '\n'.join(indented)
1672 text_nodes, messages = self.inline_text(text, lineno)
1673 line = nodes.line(text, '', *text_nodes)
1674 (line.source,
1675 line.line) = self.state_machine.get_source_and_line(lineno)
1676 if match.string.rstrip() != '|': # not empty
1677 line.indent = len(match.group(1)) - 1
1678 return line, messages, blank_finish
1679
1680 def nest_line_block_lines(self, block) -> None:
1681 for index in range(1, len(block)):
1682 if block[index].indent is None:
1683 block[index].indent = block[index - 1].indent
1684 self.nest_line_block_segment(block)
1685
1686 def nest_line_block_segment(self, block) -> None:
1687 indents = [item.indent for item in block]
1688 least = min(indents)
1689 new_items = []
1690 new_block = nodes.line_block()
1691 for item in block:
1692 if item.indent > least:
1693 new_block.append(item)
1694 else:
1695 if len(new_block):
1696 self.nest_line_block_segment(new_block)
1697 new_items.append(new_block)
1698 new_block = nodes.line_block()
1699 new_items.append(item)
1700 if len(new_block):
1701 self.nest_line_block_segment(new_block)
1702 new_items.append(new_block)
1703 block[:] = new_items
1704
1705 def grid_table_top(self, match, context, next_state):
1706 """Top border of a full table."""
1707 return self.table_top(match, context, next_state,
1708 self.isolate_grid_table,
1709 tableparser.GridTableParser)
1710
1711 def simple_table_top(self, match, context, next_state):
1712 """Top border of a simple table."""
1713 return self.table_top(match, context, next_state,
1714 self.isolate_simple_table,
1715 tableparser.SimpleTableParser)
1716
1717 def table_top(self, match, context, next_state,
1718 isolate_function, parser_class):
1719 """Top border of a generic table."""
1720 nodelist, blank_finish = self.table(isolate_function, parser_class)
1721 self.parent += nodelist
1722 if not blank_finish:
1723 msg = self.reporter.warning(
1724 'Blank line required after table.',
1725 line=self.state_machine.abs_line_number()+1)
1726 self.parent += msg
1727 return [], next_state, []
1728
1729 def table(self, isolate_function, parser_class):
1730 """Parse a table."""
1731 block, messages, blank_finish = isolate_function()
1732 if block:
1733 try:
1734 parser = parser_class()
1735 tabledata = parser.parse(block)
1736 tableline = (self.state_machine.abs_line_number() - len(block)
1737 + 1)
1738 table = self.build_table(tabledata, tableline)
1739 nodelist = [table] + messages
1740 except tableparser.TableMarkupError as err:
1741 nodelist = self.malformed_table(block, ' '.join(err.args),
1742 offset=err.offset) + messages
1743 else:
1744 nodelist = messages
1745 return nodelist, blank_finish
1746
1747 def isolate_grid_table(self):
1748 messages = []
1749 blank_finish = True
1750 try:
1751 block = self.state_machine.get_text_block(flush_left=True)
1752 except statemachine.UnexpectedIndentationError as err:
1753 block, src, srcline = err.args
1754 messages.append(self.reporter.error('Unexpected indentation.',
1755 source=src, line=srcline))
1756 blank_finish = False
1757 block.disconnect()
1758 # for East Asian chars:
1759 block.pad_double_width(self.double_width_pad_char)
1760 width = len(block[0].strip())
1761 for i in range(len(block)):
1762 block[i] = block[i].strip()
1763 if block[i][0] not in '+|': # check left edge
1764 blank_finish = False
1765 self.state_machine.previous_line(len(block) - i)
1766 del block[i:]
1767 break
1768 if not self.grid_table_top_pat.match(block[-1]): # find bottom
1769 # from second-last to third line of table:
1770 for i in range(len(block) - 2, 1, -1):
1771 if self.grid_table_top_pat.match(block[i]):
1772 self.state_machine.previous_line(len(block) - i + 1)
1773 del block[i+1:]
1774 blank_finish = False
1775 break
1776 else:
1777 detail = 'Bottom border missing or corrupt.'
1778 messages.extend(self.malformed_table(block, detail, i))
1779 return [], messages, blank_finish
1780 for i in range(len(block)): # check right edge
1781 if len(block[i]) != width or block[i][-1] not in '+|':
1782 detail = 'Right border not aligned or missing.'
1783 messages.extend(self.malformed_table(block, detail, i))
1784 return [], messages, blank_finish
1785 return block, messages, blank_finish
1786
1787 def isolate_simple_table(self):
1788 start = self.state_machine.line_offset
1789 lines = self.state_machine.input_lines
1790 limit = len(lines) - 1
1791 toplen = len(lines[start].strip())
1792 pattern_match = self.simple_table_border_pat.match
1793 found = 0
1794 found_at = None
1795 i = start + 1
1796 while i <= limit:
1797 line = lines[i]
1798 match = pattern_match(line)
1799 if match:
1800 if len(line.strip()) != toplen:
1801 self.state_machine.next_line(i - start)
1802 messages = self.malformed_table(
1803 lines[start:i+1], 'Bottom border or header rule does '
1804 'not match top border.', i-start)
1805 return [], messages, i == limit or not lines[i+1].strip()
1806 found += 1
1807 found_at = i
1808 if found == 2 or i == limit or not lines[i+1].strip():
1809 end = i
1810 break
1811 i += 1
1812 else: # reached end of input_lines
1813 details = 'No bottom table border found'
1814 if found:
1815 details += ' or no blank line after table bottom'
1816 self.state_machine.next_line(found_at - start)
1817 block = lines[start:found_at+1]
1818 else:
1819 self.state_machine.next_line(i - start - 1)
1820 block = lines[start:]
1821 messages = self.malformed_table(block, details + '.')
1822 return [], messages, not found
1823 self.state_machine.next_line(end - start)
1824 block = lines[start:end+1]
1825 # for East Asian chars:
1826 block.pad_double_width(self.double_width_pad_char)
1827 return block, [], end == limit or not lines[end+1].strip()
1828
1829 def malformed_table(self, block, detail='', offset=0):
1830 block.replace(self.double_width_pad_char, '')
1831 data = '\n'.join(block)
1832 message = 'Malformed table.'
1833 startline = self.state_machine.abs_line_number() - len(block) + 1
1834 if detail:
1835 message += '\n' + detail
1836 error = self.reporter.error(message, nodes.literal_block(data, data),
1837 line=startline+offset)
1838 return [error]
1839
1840 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):
1841 colwidths, headrows, bodyrows = tabledata
1842 table = nodes.table()
1843 if widths == 'auto':
1844 table['classes'] += ['colwidths-auto']
1845 elif widths: # "grid" or list of integers
1846 table['classes'] += ['colwidths-given']
1847 tgroup = nodes.tgroup(cols=len(colwidths))
1848 table += tgroup
1849 for colwidth in colwidths:
1850 colspec = nodes.colspec(colwidth=colwidth)
1851 if stub_columns:
1852 colspec.attributes['stub'] = True
1853 stub_columns -= 1
1854 tgroup += colspec
1855 if headrows:
1856 thead = nodes.thead()
1857 tgroup += thead
1858 for row in headrows:
1859 thead += self.build_table_row(row, tableline)
1860 tbody = nodes.tbody()
1861 tgroup += tbody
1862 for row in bodyrows:
1863 tbody += self.build_table_row(row, tableline)
1864 return table
1865
1866 def build_table_row(self, rowdata, tableline):
1867 row = nodes.row()
1868 for cell in rowdata:
1869 if cell is None:
1870 continue
1871 morerows, morecols, offset, cellblock = cell
1872 attributes = {}
1873 if morerows:
1874 attributes['morerows'] = morerows
1875 if morecols:
1876 attributes['morecols'] = morecols
1877 entry = nodes.entry(**attributes)
1878 row += entry
1879 if ''.join(cellblock):
1880 self.nested_parse(cellblock, input_offset=tableline+offset,
1881 node=entry)
1882 return row
1883
1884 explicit = Struct()
1885 """Patterns and constants used for explicit markup recognition."""
1886
1887 explicit.patterns = Struct(
1888 target=re.compile(r"""
1889 (
1890 _ # anonymous target
1891 | # *OR*
1892 (?!_) # no underscore at the beginning
1893 (?P<quote>`?) # optional open quote
1894 (?![ `]) # first char. not space or
1895 # backquote
1896 (?P<name> # reference name
1897 .+?
1898 )
1899 %(non_whitespace_escape_before)s
1900 (?P=quote) # close quote if open quote used
1901 )
1902 (?<!(?<!\x00):) # no unescaped colon at end
1903 %(non_whitespace_escape_before)s
1904 [ ]? # optional space
1905 : # end of reference name
1906 ([ ]+|$) # followed by whitespace
1907 """ % vars(Inliner), re.VERBOSE),
1908 reference=re.compile(r"""
1909 (
1910 (?P<simple>%(simplename)s)_
1911 | # *OR*
1912 ` # open backquote
1913 (?![ ]) # not space
1914 (?P<phrase>.+?) # hyperlink phrase
1915 %(non_whitespace_escape_before)s
1916 `_ # close backquote,
1917 # reference mark
1918 )
1919 $ # end of string
1920 """ % vars(Inliner), re.VERBOSE),
1921 substitution=re.compile(r"""
1922 (
1923 (?![ ]) # first char. not space
1924 (?P<name>.+?) # substitution text
1925 %(non_whitespace_escape_before)s
1926 \| # close delimiter
1927 )
1928 ([ ]+|$) # followed by whitespace
1929 """ % vars(Inliner),
1930 re.VERBOSE),)
1931
1932 def footnote(self, match):
1933 src, srcline = self.state_machine.get_source_and_line()
1934 (indented, indent, offset, blank_finish
1935 ) = self.state_machine.get_first_known_indented(match.end())
1936 label = match.group(1)
1937 name = normalize_name(label)
1938 footnote = nodes.footnote('\n'.join(indented))
1939 footnote.source = src
1940 footnote.line = srcline
1941 if name[0] == '#': # auto-numbered
1942 name = name[1:] # autonumber label
1943 footnote['auto'] = 1
1944 if name:
1945 footnote['names'].append(name)
1946 self.document.note_autofootnote(footnote)
1947 elif name == '*': # auto-symbol
1948 name = ''
1949 footnote['auto'] = '*'
1950 self.document.note_symbol_footnote(footnote)
1951 else: # manually numbered
1952 footnote += nodes.label('', label)
1953 footnote['names'].append(name)
1954 self.document.note_footnote(footnote)
1955 if name:
1956 self.document.note_explicit_target(footnote, footnote)
1957 else:
1958 self.document.set_id(footnote, footnote)
1959 if indented:
1960 self.nested_parse(indented, input_offset=offset, node=footnote)
1961 else:
1962 footnote += self.reporter.warning('Footnote content expected.')
1963 return [footnote], blank_finish
1964
1965 def citation(self, match):
1966 src, srcline = self.state_machine.get_source_and_line()
1967 (indented, indent, offset, blank_finish
1968 ) = self.state_machine.get_first_known_indented(match.end())
1969 label = match.group(1)
1970 name = normalize_name(label)
1971 citation = nodes.citation('\n'.join(indented))
1972 citation.source = src
1973 citation.line = srcline
1974 citation += nodes.label('', label)
1975 citation['names'].append(name)
1976 self.document.note_citation(citation)
1977 self.document.note_explicit_target(citation, citation)
1978 if indented:
1979 self.nested_parse(indented, input_offset=offset, node=citation)
1980 else:
1981 citation += self.reporter.warning('Citation content expected.')
1982 return [citation], blank_finish
1983
1984 def hyperlink_target(self, match):
1985 pattern = self.explicit.patterns.target
1986 lineno = self.state_machine.abs_line_number()
1987 (block, indent, offset, blank_finish
1988 ) = self.state_machine.get_first_known_indented(
1989 match.end(), until_blank=True, strip_indent=False)
1990 blocktext = match.string[:match.end()] + '\n'.join(block)
1991 block = [escape2null(line) for line in block]
1992 escaped = block[0]
1993 blockindex = 0
1994 while True:
1995 targetmatch = pattern.match(escaped)
1996 if targetmatch:
1997 break
1998 blockindex += 1
1999 try:
2000 escaped += block[blockindex]
2001 except IndexError:
2002 raise MarkupError('malformed hyperlink target.')
2003 del block[:blockindex]
2004 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
2005 target = self.make_target(block, blocktext, lineno,
2006 targetmatch.group('name'))
2007 return [target], blank_finish
2008
2009 def make_target(self, block, block_text, lineno, target_name):
2010 target_type, data = self.parse_target(block, block_text, lineno)
2011 if target_type == 'refname':
2012 target = nodes.target(block_text, '', refname=normalize_name(data))
2013 target.indirect_reference_name = data
2014 self.add_target(target_name, '', target, lineno)
2015 self.document.note_indirect_target(target)
2016 return target
2017 elif target_type == 'refuri':
2018 target = nodes.target(block_text, '')
2019 self.add_target(target_name, data, target, lineno)
2020 return target
2021 else:
2022 return data
2023
2024 def parse_target(self, block, block_text, lineno):
2025 """
2026 Determine the type of reference of a target.
2027
2028 :Return: A 2-tuple, one of:
2029
2030 - 'refname' and the indirect reference name
2031 - 'refuri' and the URI
2032 - 'malformed' and a system_message node
2033 """
2034 if block and block[-1].strip()[-1:] == '_': # possible indirect target
2035 reference = ' '.join(line.strip() for line in block)
2036 refname = self.is_reference(reference)
2037 if refname:
2038 return 'refname', refname
2039 ref_parts = split_escaped_whitespace(' '.join(block))
2040 reference = ' '.join(''.join(unescape(part).split())
2041 for part in ref_parts)
2042 return 'refuri', reference
2043
2044 def is_reference(self, reference):
2045 match = self.explicit.patterns.reference.match(
2046 whitespace_normalize_name(reference))
2047 if not match:
2048 return None
2049 return unescape(match.group('simple') or match.group('phrase'))
2050
2051 def add_target(self, targetname, refuri, target, lineno):
2052 target.line = lineno
2053 if targetname:
2054 name = normalize_name(unescape(targetname))
2055 target['names'].append(name)
2056 if refuri:
2057 uri = self.inliner.adjust_uri(refuri)
2058 if uri:
2059 target['refuri'] = uri
2060 else:
2061 raise ApplicationError('problem with URI: %r' % refuri)
2062 self.document.note_explicit_target(target, self.parent)
2063 else: # anonymous target
2064 if refuri:
2065 target['refuri'] = refuri
2066 target['anonymous'] = True
2067 self.document.note_anonymous_target(target)
2068
2069 def substitution_def(self, match):
2070 pattern = self.explicit.patterns.substitution
2071 src, srcline = self.state_machine.get_source_and_line()
2072 (block, indent, offset, blank_finish
2073 ) = self.state_machine.get_first_known_indented(match.end(),
2074 strip_indent=False)
2075 blocktext = (match.string[:match.end()] + '\n'.join(block))
2076 block.disconnect()
2077 escaped = escape2null(block[0].rstrip())
2078 blockindex = 0
2079 while True:
2080 subdefmatch = pattern.match(escaped)
2081 if subdefmatch:
2082 break
2083 blockindex += 1
2084 try:
2085 escaped = escaped + ' ' + escape2null(
2086 block[blockindex].strip())
2087 except IndexError:
2088 raise MarkupError('malformed substitution definition.')
2089 del block[:blockindex] # strip out the substitution marker
2090 start = subdefmatch.end()-len(escaped)-1
2091 block[0] = (block[0].strip() + ' ')[start:-1]
2092 if not block[0]:
2093 del block[0]
2094 offset += 1
2095 while block and not block[-1].strip():
2096 block.pop()
2097 subname = subdefmatch.group('name')
2098 substitution_node = nodes.substitution_definition(blocktext)
2099 substitution_node.source = src
2100 substitution_node.line = srcline
2101 if not block:
2102 msg = self.reporter.warning(
2103 'Substitution definition "%s" missing contents.' % subname,
2104 nodes.literal_block(blocktext, blocktext),
2105 source=src, line=srcline)
2106 return [msg], blank_finish
2107 block[0] = block[0].strip()
2108 substitution_node['names'].append(
2109 nodes.whitespace_normalize_name(subname))
2110 new_abs_offset, blank_finish = self.nested_list_parse(
2111 block, input_offset=offset, node=substitution_node,
2112 initial_state='SubstitutionDef', blank_finish=blank_finish)
2113 i = 0
2114 for node in substitution_node[:]:
2115 if not (isinstance(node, nodes.Inline)
2116 or isinstance(node, nodes.Text)):
2117 self.parent += substitution_node[i]
2118 del substitution_node[i]
2119 else:
2120 i += 1
2121 for node in substitution_node.findall(nodes.Element):
2122 if self.disallowed_inside_substitution_definitions(node):
2123 pformat = nodes.literal_block('', node.pformat().rstrip())
2124 msg = self.reporter.error(
2125 'Substitution definition contains illegal element <%s>:'
2126 % node.tagname,
2127 pformat, nodes.literal_block(blocktext, blocktext),
2128 source=src, line=srcline)
2129 return [msg], blank_finish
2130 if len(substitution_node) == 0:
2131 msg = self.reporter.warning(
2132 'Substitution definition "%s" empty or invalid.' % subname,
2133 nodes.literal_block(blocktext, blocktext),
2134 source=src, line=srcline)
2135 return [msg], blank_finish
2136 self.document.note_substitution_def(
2137 substitution_node, subname, self.parent)
2138 return [substitution_node], blank_finish
2139
2140 def disallowed_inside_substitution_definitions(self, node) -> bool:
2141 if (node['ids']
2142 or isinstance(node, nodes.reference) and node.get('anonymous')
2143 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501
2144 return True
2145 else:
2146 return False
2147
2148 def directive(self, match, **option_presets):
2149 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2150 type_name = match.group(1)
2151 directive_class, messages = directives.directive(
2152 type_name, self.memo.language, self.document)
2153 self.parent += messages
2154 if directive_class:
2155 return self.run_directive(
2156 directive_class, match, type_name, option_presets)
2157 else:
2158 return self.unknown_directive(type_name)
2159
2160 def run_directive(self, directive, match, type_name, option_presets):
2161 """
2162 Parse a directive then run its directive function.
2163
2164 Parameters:
2165
2166 - `directive`: The class implementing the directive. Must be
2167 a subclass of `rst.Directive`.
2168
2169 - `match`: A regular expression match object which matched the first
2170 line of the directive.
2171
2172 - `type_name`: The directive name, as used in the source text.
2173
2174 - `option_presets`: A dictionary of preset options, defaults for the
2175 directive options. Currently, only an "alt" option is passed by
2176 substitution definitions (value: the substitution name), which may
2177 be used by an embedded image directive.
2178
2179 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2180 """
2181 if isinstance(directive, (FunctionType, MethodType)):
2182 from docutils.parsers.rst import convert_directive_function
2183 directive = convert_directive_function(directive)
2184 lineno = self.state_machine.abs_line_number()
2185 initial_line_offset = self.state_machine.line_offset
2186 (indented, indent, line_offset, blank_finish
2187 ) = self.state_machine.get_first_known_indented(match.end(),
2188 strip_top=0)
2189 block_text = '\n'.join(self.state_machine.input_lines[
2190 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501
2191 try:
2192 arguments, options, content, content_offset = (
2193 self.parse_directive_block(indented, line_offset,
2194 directive, option_presets))
2195 except MarkupError as detail:
2196 error = self.reporter.error(
2197 'Error in "%s" directive:\n%s.' % (type_name,
2198 ' '.join(detail.args)),
2199 nodes.literal_block(block_text, block_text), line=lineno)
2200 return [error], blank_finish
2201 directive_instance = directive(
2202 type_name, arguments, options, content, lineno,
2203 content_offset, block_text, self, self.state_machine)
2204 try:
2205 result = directive_instance.run()
2206 except docutils.parsers.rst.DirectiveError as error:
2207 msg_node = self.reporter.system_message(error.level, error.msg,
2208 line=lineno)
2209 msg_node += nodes.literal_block(block_text, block_text)
2210 result = [msg_node]
2211 assert isinstance(result, list), \
2212 'Directive "%s" must return a list of nodes.' % type_name
2213 for i in range(len(result)):
2214 assert isinstance(result[i], nodes.Node), \
2215 ('Directive "%s" returned non-Node object (index %s): %r'
2216 % (type_name, i, result[i]))
2217 return (result,
2218 blank_finish or self.state_machine.is_next_line_blank())
2219
2220 def parse_directive_block(self, indented, line_offset, directive,
2221 option_presets):
2222 option_spec = directive.option_spec
2223 has_content = directive.has_content
2224 if indented and not indented[0].strip():
2225 indented.trim_start()
2226 line_offset += 1
2227 while indented and not indented[-1].strip():
2228 indented.trim_end()
2229 if indented and (directive.required_arguments
2230 or directive.optional_arguments
2231 or option_spec):
2232 for i, line in enumerate(indented):
2233 if not line.strip():
2234 break
2235 else:
2236 i += 1
2237 arg_block = indented[:i]
2238 content = indented[i+1:]
2239 content_offset = line_offset + i + 1
2240 else:
2241 content = indented
2242 content_offset = line_offset
2243 arg_block = []
2244 if option_spec:
2245 options, arg_block = self.parse_directive_options(
2246 option_presets, option_spec, arg_block)
2247 else:
2248 options = {}
2249 if arg_block and not (directive.required_arguments
2250 or directive.optional_arguments):
2251 content = arg_block + indented[i:]
2252 content_offset = line_offset
2253 arg_block = []
2254 while content and not content[0].strip():
2255 content.trim_start()
2256 content_offset += 1
2257 if directive.required_arguments or directive.optional_arguments:
2258 arguments = self.parse_directive_arguments(
2259 directive, arg_block)
2260 else:
2261 arguments = []
2262 if content and not has_content:
2263 raise MarkupError('no content permitted')
2264 return arguments, options, content, content_offset
2265
2266 def parse_directive_options(self, option_presets, option_spec, arg_block):
2267 options = option_presets.copy()
2268 for i, line in enumerate(arg_block):
2269 if re.match(Body.patterns['field_marker'], line):
2270 opt_block = arg_block[i:]
2271 arg_block = arg_block[:i]
2272 break
2273 else:
2274 opt_block = []
2275 if opt_block:
2276 success, data = self.parse_extension_options(option_spec,
2277 opt_block)
2278 if success: # data is a dict of options
2279 options.update(data)
2280 else: # data is an error string
2281 raise MarkupError(data)
2282 return options, arg_block
2283
2284 def parse_directive_arguments(self, directive, arg_block):
2285 required = directive.required_arguments
2286 optional = directive.optional_arguments
2287 arg_text = '\n'.join(arg_block)
2288 arguments = arg_text.split()
2289 if len(arguments) < required:
2290 raise MarkupError('%s argument(s) required, %s supplied'
2291 % (required, len(arguments)))
2292 elif len(arguments) > required + optional:
2293 if directive.final_argument_whitespace:
2294 arguments = arg_text.split(None, required + optional - 1)
2295 else:
2296 raise MarkupError(
2297 'maximum %s argument(s) allowed, %s supplied'
2298 % (required + optional, len(arguments)))
2299 return arguments
2300
2301 def parse_extension_options(self, option_spec, datalines):
2302 """
2303 Parse `datalines` for a field list containing extension options
2304 matching `option_spec`.
2305
2306 :Parameters:
2307 - `option_spec`: a mapping of option name to conversion
2308 function, which should raise an exception on bad input.
2309 - `datalines`: a list of input strings.
2310
2311 :Return:
2312 - Success value, 1 or 0.
2313 - An option dictionary on success, an error string on failure.
2314 """
2315 node = nodes.field_list()
2316 newline_offset, blank_finish = self.nested_list_parse(
2317 datalines, 0, node, initial_state='ExtensionOptions',
2318 blank_finish=True)
2319 if newline_offset != len(datalines): # incomplete parse of block
2320 return 0, 'invalid option block'
2321 try:
2322 options = utils.extract_extension_options(node, option_spec)
2323 except KeyError as detail:
2324 return 0, 'unknown option: "%s"' % detail.args[0]
2325 except (ValueError, TypeError) as detail:
2326 return 0, 'invalid option value: %s' % ' '.join(detail.args)
2327 except utils.ExtensionOptionError as detail:
2328 return 0, 'invalid option data: %s' % ' '.join(detail.args)
2329 if blank_finish:
2330 return 1, options
2331 else:
2332 return 0, 'option data incompletely parsed'
2333
2334 def unknown_directive(self, type_name):
2335 lineno = self.state_machine.abs_line_number()
2336 (indented, indent, offset, blank_finish
2337 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)
2338 text = '\n'.join(indented)
2339 error = self.reporter.error('Unknown directive type "%s".' % type_name,
2340 nodes.literal_block(text, text),
2341 line=lineno)
2342 return [error], blank_finish
2343
2344 def comment(self, match):
2345 if self.state_machine.is_next_line_blank():
2346 first_comment_line = match.string[match.end():]
2347 if not first_comment_line.strip(): # empty comment
2348 return [nodes.comment()], True # "A tiny but practical wart."
2349 if first_comment_line.startswith('end of inclusion from "'):
2350 # cf. parsers.rst.directives.misc.Include
2351 self.document.include_log.pop()
2352 return [], True
2353 (indented, indent, offset, blank_finish
2354 ) = self.state_machine.get_first_known_indented(match.end())
2355 while indented and not indented[-1].strip():
2356 indented.trim_end()
2357 text = '\n'.join(indented)
2358 return [nodes.comment(text, text)], blank_finish
2359
2360 explicit.constructs = [
2361 (footnote,
2362 re.compile(r"""
2363 \.\.[ ]+ # explicit markup start
2364 \[
2365 ( # footnote label:
2366 [0-9]+ # manually numbered footnote
2367 | # *OR*
2368 \# # anonymous auto-numbered footnote
2369 | # *OR*
2370 \#%s # auto-number ed?) footnote label
2371 | # *OR*
2372 \* # auto-symbol footnote
2373 )
2374 \]
2375 ([ ]+|$) # whitespace or end of line
2376 """ % Inliner.simplename, re.VERBOSE)),
2377 (citation,
2378 re.compile(r"""
2379 \.\.[ ]+ # explicit markup start
2380 \[(%s)\] # citation label
2381 ([ ]+|$) # whitespace or end of line
2382 """ % Inliner.simplename, re.VERBOSE)),
2383 (hyperlink_target,
2384 re.compile(r"""
2385 \.\.[ ]+ # explicit markup start
2386 _ # target indicator
2387 (?![ ]|$) # first char. not space or EOL
2388 """, re.VERBOSE)),
2389 (substitution_def,
2390 re.compile(r"""
2391 \.\.[ ]+ # explicit markup start
2392 \| # substitution indicator
2393 (?![ ]|$) # first char. not space or EOL
2394 """, re.VERBOSE)),
2395 (directive,
2396 re.compile(r"""
2397 \.\.[ ]+ # explicit markup start
2398 (%s) # directive name
2399 [ ]? # optional space
2400 :: # directive delimiter
2401 ([ ]+|$) # whitespace or end of line
2402 """ % Inliner.simplename, re.VERBOSE))]
2403
2404 def explicit_markup(self, match, context, next_state):
2405 """Footnotes, hyperlink targets, directives, comments."""
2406 nodelist, blank_finish = self.explicit_construct(match)
2407 self.parent += nodelist
2408 self.explicit_list(blank_finish)
2409 return [], next_state, []
2410
2411 def explicit_construct(self, match):
2412 """Determine which explicit construct this is, parse & return it."""
2413 errors = []
2414 for method, pattern in self.explicit.constructs:
2415 expmatch = pattern.match(match.string)
2416 if expmatch:
2417 try:
2418 return method(self, expmatch)
2419 except MarkupError as error:
2420 lineno = self.state_machine.abs_line_number()
2421 message = ' '.join(error.args)
2422 errors.append(self.reporter.warning(message, line=lineno))
2423 break
2424 nodelist, blank_finish = self.comment(match)
2425 return nodelist + errors, blank_finish
2426
2427 def explicit_list(self, blank_finish) -> None:
2428 """
2429 Create a nested state machine for a series of explicit markup
2430 constructs (including anonymous hyperlink targets).
2431 """
2432 offset = self.state_machine.line_offset + 1 # next line
2433 newline_offset, blank_finish = self.nested_list_parse(
2434 self.state_machine.input_lines[offset:],
2435 input_offset=self.state_machine.abs_line_offset() + 1,
2436 node=self.parent, initial_state='Explicit',
2437 blank_finish=blank_finish,
2438 match_titles=self.state_machine.match_titles)
2439 self.goto_line(newline_offset)
2440 if not blank_finish:
2441 self.parent += self.unindent_warning('Explicit markup')
2442
2443 def anonymous(self, match, context, next_state):
2444 """Anonymous hyperlink targets."""
2445 nodelist, blank_finish = self.anonymous_target(match)
2446 self.parent += nodelist
2447 self.explicit_list(blank_finish)
2448 return [], next_state, []
2449
2450 def anonymous_target(self, match):
2451 lineno = self.state_machine.abs_line_number()
2452 (block, indent, offset, blank_finish
2453 ) = self.state_machine.get_first_known_indented(match.end(),
2454 until_blank=True)
2455 blocktext = match.string[:match.end()] + '\n'.join(block)
2456 block = [escape2null(line) for line in block]
2457 target = self.make_target(block, blocktext, lineno, '')
2458 return [target], blank_finish
2459
2460 def line(self, match, context, next_state):
2461 """Section title overline or transition marker."""
2462 if self.state_machine.match_titles:
2463 return [match.string], 'Line', []
2464 elif match.string.strip() == '::':
2465 raise statemachine.TransitionCorrection('text')
2466 elif len(match.string.strip()) < 4:
2467 msg = self.reporter.info(
2468 'Unexpected possible title overline or transition.\n'
2469 "Treating it as ordinary text because it's so short.",
2470 line=self.state_machine.abs_line_number())
2471 self.parent += msg
2472 raise statemachine.TransitionCorrection('text')
2473 else:
2474 blocktext = self.state_machine.line
2475 msg = self.reporter.error(
2476 'Unexpected section title or transition.',
2477 nodes.literal_block(blocktext, blocktext),
2478 line=self.state_machine.abs_line_number())
2479 self.parent += msg
2480 return [], next_state, []
2481
2482 def text(self, match, context, next_state):
2483 """Titles, definition lists, paragraphs."""
2484 return [match.string], 'Text', []
2485
2486
2487class RFC2822Body(Body):
2488
2489 """
2490 RFC2822 headers are only valid as the first constructs in documents. As
2491 soon as anything else appears, the `Body` state should take over.
2492 """
2493
2494 patterns = Body.patterns.copy() # can't modify the original
2495 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
2496 initial_transitions = [(name, 'Body')
2497 for name in Body.initial_transitions]
2498 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2499
2500 def rfc2822(self, match, context, next_state):
2501 """RFC2822-style field list item."""
2502 fieldlist = nodes.field_list(classes=['rfc2822'])
2503 self.parent += fieldlist
2504 field, blank_finish = self.rfc2822_field(match)
2505 fieldlist += field
2506 offset = self.state_machine.line_offset + 1 # next line
2507 newline_offset, blank_finish = self.nested_list_parse(
2508 self.state_machine.input_lines[offset:],
2509 input_offset=self.state_machine.abs_line_offset() + 1,
2510 node=fieldlist, initial_state='RFC2822List',
2511 blank_finish=blank_finish)
2512 self.goto_line(newline_offset)
2513 if not blank_finish:
2514 self.parent += self.unindent_warning(
2515 'RFC2822-style field list')
2516 return [], next_state, []
2517
2518 def rfc2822_field(self, match):
2519 name = match.string[:match.string.find(':')]
2520 (indented, indent, line_offset, blank_finish
2521 ) = self.state_machine.get_first_known_indented(match.end(),
2522 until_blank=True)
2523 fieldnode = nodes.field()
2524 fieldnode += nodes.field_name(name, name)
2525 fieldbody = nodes.field_body('\n'.join(indented))
2526 fieldnode += fieldbody
2527 if indented:
2528 self.nested_parse(indented, input_offset=line_offset,
2529 node=fieldbody)
2530 return fieldnode, blank_finish
2531
2532
2533class SpecializedBody(Body):
2534
2535 """
2536 Superclass for second and subsequent compound element members. Compound
2537 elements are lists and list-like constructs.
2538
2539 All transition methods are disabled (redefined as `invalid_input`).
2540 Override individual methods in subclasses to re-enable.
2541
2542 For example, once an initial bullet list item, say, is recognized, the
2543 `BulletList` subclass takes over, with a "bullet_list" node as its
2544 container. Upon encountering the initial bullet list item, `Body.bullet`
2545 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2546 starts up a nested parsing session with `BulletList` as the initial state.
2547 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2548 as only bullet list items are encountered, they are parsed and inserted
2549 into the container. The first construct which is *not* a bullet list item
2550 triggers the `invalid_input` method, which ends the nested parse and
2551 closes the container. `BulletList` needs to recognize input that is
2552 invalid in the context of a bullet list, which means everything *other
2553 than* bullet list items, so it inherits the transition list created in
2554 `Body`.
2555 """
2556
2557 def invalid_input(self, match=None, context=None, next_state=None):
2558 """Not a compound element member. Abort this state machine."""
2559 self.state_machine.previous_line() # back up so parent SM can reassess
2560 raise EOFError
2561
2562 indent = invalid_input
2563 bullet = invalid_input
2564 enumerator = invalid_input
2565 field_marker = invalid_input
2566 option_marker = invalid_input
2567 doctest = invalid_input
2568 line_block = invalid_input
2569 grid_table_top = invalid_input
2570 simple_table_top = invalid_input
2571 explicit_markup = invalid_input
2572 anonymous = invalid_input
2573 line = invalid_input
2574 text = invalid_input
2575
2576
2577class BulletList(SpecializedBody):
2578
2579 """Second and subsequent bullet_list list_items."""
2580
2581 def bullet(self, match, context, next_state):
2582 """Bullet list item."""
2583 if match.string[0] != self.parent['bullet']:
2584 # different bullet: new list
2585 self.invalid_input()
2586 listitem, blank_finish = self.list_item(match.end())
2587 self.parent += listitem
2588 self.blank_finish = blank_finish
2589 return [], next_state, []
2590
2591
2592class DefinitionList(SpecializedBody):
2593
2594 """Second and subsequent definition_list_items."""
2595
2596 def text(self, match, context, next_state):
2597 """Definition lists."""
2598 return [match.string], 'Definition', []
2599
2600
2601class EnumeratedList(SpecializedBody):
2602
2603 """Second and subsequent enumerated_list list_items."""
2604
2605 def enumerator(self, match, context, next_state):
2606 """Enumerated list item."""
2607 format, sequence, text, ordinal = self.parse_enumerator(
2608 match, self.parent['enumtype'])
2609 if (format != self.format
2610 or (sequence != '#' and (sequence != self.parent['enumtype']
2611 or self.auto
2612 or ordinal != (self.lastordinal + 1)))
2613 or not self.is_enumerated_list_item(ordinal, sequence, format)):
2614 # different enumeration: new list
2615 self.invalid_input()
2616 if sequence == '#':
2617 self.auto = 1
2618 listitem, blank_finish = self.list_item(match.end())
2619 self.parent += listitem
2620 self.blank_finish = blank_finish
2621 self.lastordinal = ordinal
2622 return [], next_state, []
2623
2624
2625class FieldList(SpecializedBody):
2626
2627 """Second and subsequent field_list fields."""
2628
2629 def field_marker(self, match, context, next_state):
2630 """Field list field."""
2631 field, blank_finish = self.field(match)
2632 self.parent += field
2633 self.blank_finish = blank_finish
2634 return [], next_state, []
2635
2636
2637class OptionList(SpecializedBody):
2638
2639 """Second and subsequent option_list option_list_items."""
2640
2641 def option_marker(self, match, context, next_state):
2642 """Option list item."""
2643 try:
2644 option_list_item, blank_finish = self.option_list_item(match)
2645 except MarkupError:
2646 self.invalid_input()
2647 self.parent += option_list_item
2648 self.blank_finish = blank_finish
2649 return [], next_state, []
2650
2651
2652class RFC2822List(SpecializedBody, RFC2822Body):
2653
2654 """Second and subsequent RFC2822-style field_list fields."""
2655
2656 patterns = RFC2822Body.patterns
2657 initial_transitions = RFC2822Body.initial_transitions
2658
2659 def rfc2822(self, match, context, next_state):
2660 """RFC2822-style field list item."""
2661 field, blank_finish = self.rfc2822_field(match)
2662 self.parent += field
2663 self.blank_finish = blank_finish
2664 return [], 'RFC2822List', []
2665
2666 blank = SpecializedBody.invalid_input
2667
2668
2669class ExtensionOptions(FieldList):
2670
2671 """
2672 Parse field_list fields for extension options.
2673
2674 No nested parsing is done (including inline markup parsing).
2675 """
2676
2677 def parse_field_body(self, indented, offset, node) -> None:
2678 """Override `Body.parse_field_body` for simpler parsing."""
2679 lines = []
2680 for line in list(indented) + ['']:
2681 if line.strip():
2682 lines.append(line)
2683 elif lines:
2684 text = '\n'.join(lines)
2685 node += nodes.paragraph(text, text)
2686 lines = []
2687
2688
2689class LineBlock(SpecializedBody):
2690
2691 """Second and subsequent lines of a line_block."""
2692
2693 blank = SpecializedBody.invalid_input
2694
2695 def line_block(self, match, context, next_state):
2696 """New line of line block."""
2697 lineno = self.state_machine.abs_line_number()
2698 line, messages, blank_finish = self.line_block_line(match, lineno)
2699 self.parent += line
2700 self.parent.parent += messages
2701 self.blank_finish = blank_finish
2702 return [], next_state, []
2703
2704
2705class Explicit(SpecializedBody):
2706
2707 """Second and subsequent explicit markup construct."""
2708
2709 def explicit_markup(self, match, context, next_state):
2710 """Footnotes, hyperlink targets, directives, comments."""
2711 nodelist, blank_finish = self.explicit_construct(match)
2712 self.parent += nodelist
2713 self.blank_finish = blank_finish
2714 return [], next_state, []
2715
2716 def anonymous(self, match, context, next_state):
2717 """Anonymous hyperlink targets."""
2718 nodelist, blank_finish = self.anonymous_target(match)
2719 self.parent += nodelist
2720 self.blank_finish = blank_finish
2721 return [], next_state, []
2722
2723 blank = SpecializedBody.invalid_input
2724
2725
2726class SubstitutionDef(Body):
2727
2728 """
2729 Parser for the contents of a substitution_definition element.
2730 """
2731
2732 patterns = {
2733 'embedded_directive': re.compile(r'(%s)::( +|$)'
2734 % Inliner.simplename),
2735 'text': r''}
2736 initial_transitions = ['embedded_directive', 'text']
2737
2738 def embedded_directive(self, match, context, next_state):
2739 nodelist, blank_finish = self.directive(match,
2740 alt=self.parent['names'][0])
2741 self.parent += nodelist
2742 if not self.state_machine.at_eof():
2743 self.blank_finish = blank_finish
2744 raise EOFError
2745
2746 def text(self, match, context, next_state):
2747 if not self.state_machine.at_eof():
2748 self.blank_finish = self.state_machine.is_next_line_blank()
2749 raise EOFError
2750
2751
2752class Text(RSTState):
2753
2754 """
2755 Classifier of second line of a text block.
2756
2757 Could be a paragraph, a definition list item, or a title.
2758 """
2759
2760 patterns = {'underline': Body.patterns['line'],
2761 'text': r''}
2762 initial_transitions = [('underline', 'Body'), ('text', 'Body')]
2763
2764 def blank(self, match, context, next_state):
2765 """End of paragraph."""
2766 # NOTE: self.paragraph returns [node, system_message(s)], literalnext
2767 paragraph, literalnext = self.paragraph(
2768 context, self.state_machine.abs_line_number() - 1)
2769 self.parent += paragraph
2770 if literalnext:
2771 self.parent += self.literal_block()
2772 return [], 'Body', []
2773
2774 def eof(self, context):
2775 if context:
2776 self.blank(None, context, None)
2777 return []
2778
2779 def indent(self, match, context, next_state):
2780 """Definition list item."""
2781 dl = nodes.definition_list()
2782 # the definition list starts on the line before the indent:
2783 lineno = self.state_machine.abs_line_number() - 1
2784 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)
2785 dl_item, blank_finish = self.definition_list_item(context)
2786 dl += dl_item
2787 self.parent += dl
2788 offset = self.state_machine.line_offset + 1 # next line
2789 newline_offset, blank_finish = self.nested_list_parse(
2790 self.state_machine.input_lines[offset:],
2791 input_offset=self.state_machine.abs_line_offset() + 1,
2792 node=dl, initial_state='DefinitionList',
2793 blank_finish=blank_finish, blank_finish_state='Definition')
2794 self.goto_line(newline_offset)
2795 if not blank_finish:
2796 self.parent += self.unindent_warning('Definition list')
2797 return [], 'Body', []
2798
2799 def underline(self, match, context, next_state):
2800 """Section title."""
2801 lineno = self.state_machine.abs_line_number()
2802 title = context[0].rstrip()
2803 underline = match.string.rstrip()
2804 source = title + '\n' + underline
2805 messages = []
2806 if column_width(title) > len(underline):
2807 if len(underline) < 4:
2808 if self.state_machine.match_titles:
2809 msg = self.reporter.info(
2810 'Possible title underline, too short for the title.\n'
2811 "Treating it as ordinary text because it's so short.",
2812 line=lineno)
2813 self.parent += msg
2814 raise statemachine.TransitionCorrection('text')
2815 else:
2816 blocktext = context[0] + '\n' + self.state_machine.line
2817 msg = self.reporter.warning(
2818 'Title underline too short.',
2819 nodes.literal_block(blocktext, blocktext),
2820 line=lineno)
2821 messages.append(msg)
2822 if not self.state_machine.match_titles:
2823 blocktext = context[0] + '\n' + self.state_machine.line
2824 # We need get_source_and_line() here to report correctly
2825 src, srcline = self.state_machine.get_source_and_line()
2826 # TODO: why is abs_line_number() == srcline+1
2827 # if the error is in a table (try with test_tables.py)?
2828 # print("get_source_and_line", srcline)
2829 # print("abs_line_number", self.state_machine.abs_line_number())
2830 msg = self.reporter.error(
2831 'Unexpected section title.',
2832 nodes.literal_block(blocktext, blocktext),
2833 source=src, line=srcline)
2834 self.parent += messages
2835 self.parent += msg
2836 return [], next_state, []
2837 style = underline[0]
2838 context[:] = []
2839 self.section(title, source, style, lineno - 1, messages)
2840 return [], next_state, []
2841
2842 def text(self, match, context, next_state):
2843 """Paragraph."""
2844 startline = self.state_machine.abs_line_number() - 1
2845 msg = None
2846 try:
2847 block = self.state_machine.get_text_block(flush_left=True)
2848 except statemachine.UnexpectedIndentationError as err:
2849 block, src, srcline = err.args
2850 msg = self.reporter.error('Unexpected indentation.',
2851 source=src, line=srcline)
2852 lines = context + list(block)
2853 paragraph, literalnext = self.paragraph(lines, startline)
2854 self.parent += paragraph
2855 self.parent += msg
2856 if literalnext:
2857 try:
2858 self.state_machine.next_line()
2859 except EOFError:
2860 pass
2861 self.parent += self.literal_block()
2862 return [], next_state, []
2863
2864 def literal_block(self):
2865 """Return a list of nodes."""
2866 (indented, indent, offset, blank_finish
2867 ) = self.state_machine.get_indented()
2868 while indented and not indented[-1].strip():
2869 indented.trim_end()
2870 if not indented:
2871 return self.quoted_literal_block()
2872 data = '\n'.join(indented)
2873 literal_block = nodes.literal_block(data, data)
2874 (literal_block.source,
2875 literal_block.line) = self.state_machine.get_source_and_line(offset+1)
2876 nodelist = [literal_block]
2877 if not blank_finish:
2878 nodelist.append(self.unindent_warning('Literal block'))
2879 return nodelist
2880
2881 def quoted_literal_block(self):
2882 abs_line_offset = self.state_machine.abs_line_offset()
2883 offset = self.state_machine.line_offset
2884 parent_node = nodes.Element()
2885 new_abs_offset = self.nested_parse(
2886 self.state_machine.input_lines[offset:],
2887 input_offset=abs_line_offset, node=parent_node, match_titles=False,
2888 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
2889 'initial_state': 'QuotedLiteralBlock'})
2890 self.goto_line(new_abs_offset)
2891 return parent_node.children
2892
2893 def definition_list_item(self, termline):
2894 # the parser is already on the second (indented) line:
2895 dd_lineno = self.state_machine.abs_line_number()
2896 dt_lineno = dd_lineno - 1
2897 (indented, indent, line_offset, blank_finish
2898 ) = self.state_machine.get_indented()
2899 dl_item = nodes.definition_list_item(
2900 '\n'.join(termline + list(indented)))
2901 (dl_item.source,
2902 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)
2903 dt_nodes, messages = self.term(termline, dt_lineno)
2904 dl_item += dt_nodes
2905 dd = nodes.definition('', *messages)
2906 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)
2907 dl_item += dd
2908 if termline[0][-2:] == '::':
2909 dd += self.reporter.info(
2910 'Blank line missing before literal block (after the "::")? '
2911 'Interpreted as a definition list item.',
2912 line=dd_lineno)
2913 # TODO: drop a definition if it is an empty comment to allow
2914 # definition list items with several terms?
2915 # https://sourceforge.net/p/docutils/feature-requests/60/
2916 self.nested_parse(indented, input_offset=line_offset, node=dd)
2917 return dl_item, blank_finish
2918
2919 classifier_delimiter = re.compile(' +: +')
2920
2921 def term(self, lines, lineno):
2922 """Return a definition_list's term and optional classifiers."""
2923 assert len(lines) == 1
2924 text_nodes, messages = self.inline_text(lines[0], lineno)
2925 dt = nodes.term(lines[0])
2926 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)
2927 node_list = [dt]
2928 for i in range(len(text_nodes)):
2929 node = text_nodes[i]
2930 if isinstance(node, nodes.Text):
2931 parts = self.classifier_delimiter.split(node)
2932 if len(parts) == 1:
2933 node_list[-1] += node
2934 else:
2935 text = parts[0].rstrip()
2936 textnode = nodes.Text(text)
2937 node_list[-1] += textnode
2938 node_list += [nodes.classifier(unescape(part, True), part)
2939 for part in parts[1:]]
2940 else:
2941 node_list[-1] += node
2942 return node_list, messages
2943
2944
2945class SpecializedText(Text):
2946
2947 """
2948 Superclass for second and subsequent lines of Text-variants.
2949
2950 All transition methods are disabled. Override individual methods in
2951 subclasses to re-enable.
2952 """
2953
2954 def eof(self, context):
2955 """Incomplete construct."""
2956 return []
2957
2958 def invalid_input(self, match=None, context=None, next_state=None):
2959 """Not a compound element member. Abort this state machine."""
2960 raise EOFError
2961
2962 blank = invalid_input
2963 indent = invalid_input
2964 underline = invalid_input
2965 text = invalid_input
2966
2967
2968class Definition(SpecializedText):
2969
2970 """Second line of potential definition_list_item."""
2971
2972 def eof(self, context):
2973 """Not a definition."""
2974 self.state_machine.previous_line(2) # so parent SM can reassess
2975 return []
2976
2977 def indent(self, match, context, next_state):
2978 """Definition list item."""
2979 dl_item, blank_finish = self.definition_list_item(context)
2980 self.parent += dl_item
2981 self.blank_finish = blank_finish
2982 return [], 'DefinitionList', []
2983
2984
2985class Line(SpecializedText):
2986
2987 """
2988 Second line of over- & underlined section title or transition marker.
2989 """
2990
2991 eofcheck = 1 # ignored, will be removed in Docutils 2.0.
2992
2993 def eof(self, context):
2994 """Transition marker at end of section or document."""
2995 marker = context[0].strip()
2996 if len(marker) < 4:
2997 self.state_correction(context)
2998 src, srcline = self.state_machine.get_source_and_line()
2999 # lineno = self.state_machine.abs_line_number() - 1
3000 transition = nodes.transition(rawsource=context[0])
3001 transition.source = src
3002 transition.line = srcline - 1
3003 # transition.line = lineno
3004 self.parent += transition
3005 return []
3006
3007 def blank(self, match, context, next_state):
3008 """Transition marker."""
3009 src, srcline = self.state_machine.get_source_and_line()
3010 marker = context[0].strip()
3011 if len(marker) < 4:
3012 self.state_correction(context)
3013 transition = nodes.transition(rawsource=marker)
3014 transition.source = src
3015 transition.line = srcline - 1
3016 self.parent += transition
3017 return [], 'Body', []
3018
3019 def text(self, match, context, next_state):
3020 """Potential over- & underlined title."""
3021 lineno = self.state_machine.abs_line_number() - 1
3022 overline = context[0]
3023 title = match.string
3024 underline = ''
3025 try:
3026 underline = self.state_machine.next_line()
3027 except EOFError:
3028 blocktext = overline + '\n' + title
3029 if len(overline.rstrip()) < 4:
3030 self.short_overline(context, blocktext, lineno, 2)
3031 else:
3032 msg = self.reporter.error(
3033 'Incomplete section title.',
3034 nodes.literal_block(blocktext, blocktext),
3035 line=lineno)
3036 self.parent += msg
3037 return [], 'Body', []
3038 source = '%s\n%s\n%s' % (overline, title, underline)
3039 overline = overline.rstrip()
3040 underline = underline.rstrip()
3041 if not self.transitions['underline'][0].match(underline):
3042 blocktext = overline + '\n' + title + '\n' + underline
3043 if len(overline.rstrip()) < 4:
3044 self.short_overline(context, blocktext, lineno, 2)
3045 else:
3046 msg = self.reporter.error(
3047 'Missing matching underline for section title overline.',
3048 nodes.literal_block(source, source),
3049 line=lineno)
3050 self.parent += msg
3051 return [], 'Body', []
3052 elif overline != underline:
3053 blocktext = overline + '\n' + title + '\n' + underline
3054 if len(overline.rstrip()) < 4:
3055 self.short_overline(context, blocktext, lineno, 2)
3056 else:
3057 msg = self.reporter.error(
3058 'Title overline & underline mismatch.',
3059 nodes.literal_block(source, source),
3060 line=lineno)
3061 self.parent += msg
3062 return [], 'Body', []
3063 title = title.rstrip()
3064 messages = []
3065 if column_width(title) > len(overline):
3066 blocktext = overline + '\n' + title + '\n' + underline
3067 if len(overline.rstrip()) < 4:
3068 self.short_overline(context, blocktext, lineno, 2)
3069 else:
3070 msg = self.reporter.warning(
3071 'Title overline too short.',
3072 nodes.literal_block(source, source),
3073 line=lineno)
3074 messages.append(msg)
3075 style = (overline[0], underline[0])
3076 self.section(title.lstrip(), source, style, lineno + 1, messages)
3077 return [], 'Body', []
3078
3079 indent = text # indented title
3080
3081 def underline(self, match, context, next_state):
3082 overline = context[0]
3083 blocktext = overline + '\n' + self.state_machine.line
3084 lineno = self.state_machine.abs_line_number() - 1
3085 if len(overline.rstrip()) < 4:
3086 self.short_overline(context, blocktext, lineno, 1)
3087 msg = self.reporter.error(
3088 'Invalid section title or transition marker.',
3089 nodes.literal_block(blocktext, blocktext),
3090 line=lineno)
3091 self.parent += msg
3092 return [], 'Body', []
3093
3094 def short_overline(self, context, blocktext, lineno, lines=1) -> None:
3095 msg = self.reporter.info(
3096 'Possible incomplete section title.\nTreating the overline as '
3097 "ordinary text because it's so short.",
3098 line=lineno)
3099 self.parent += msg
3100 self.state_correction(context, lines)
3101
3102 def state_correction(self, context, lines=1):
3103 self.state_machine.previous_line(lines)
3104 context[:] = []
3105 raise statemachine.StateCorrection('Body', 'text')
3106
3107
3108class QuotedLiteralBlock(RSTState):
3109
3110 """
3111 Nested parse handler for quoted (unindented) literal blocks.
3112
3113 Special-purpose. Not for inclusion in `state_classes`.
3114 """
3115
3116 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
3117 'text': r''}
3118 initial_transitions = ('initial_quoted', 'text')
3119
3120 def __init__(self, state_machine, debug=False) -> None:
3121 RSTState.__init__(self, state_machine, debug)
3122 self.messages = []
3123 self.initial_lineno = None
3124
3125 def blank(self, match, context, next_state):
3126 if context:
3127 raise EOFError
3128 else:
3129 return context, next_state, []
3130
3131 def eof(self, context):
3132 if context:
3133 src, srcline = self.state_machine.get_source_and_line(
3134 self.initial_lineno)
3135 text = '\n'.join(context)
3136 literal_block = nodes.literal_block(text, text)
3137 literal_block.source = src
3138 literal_block.line = srcline
3139 self.parent += literal_block
3140 else:
3141 self.parent += self.reporter.warning(
3142 'Literal block expected; none found.',
3143 line=self.state_machine.abs_line_number()
3144 ) # src not available, statemachine.input_lines is empty
3145 self.state_machine.previous_line()
3146 self.parent += self.messages
3147 return []
3148
3149 def indent(self, match, context, next_state):
3150 assert context, ('QuotedLiteralBlock.indent: context should not '
3151 'be empty!')
3152 self.messages.append(
3153 self.reporter.error('Unexpected indentation.',
3154 line=self.state_machine.abs_line_number()))
3155 self.state_machine.previous_line()
3156 raise EOFError
3157
3158 def initial_quoted(self, match, context, next_state):
3159 """Match arbitrary quote character on the first line only."""
3160 self.remove_transition('initial_quoted')
3161 quote = match.string[0]
3162 pattern = re.compile(re.escape(quote))
3163 # New transition matches consistent quotes only:
3164 self.add_transition('quoted',
3165 (pattern, self.quoted, self.__class__.__name__))
3166 self.initial_lineno = self.state_machine.abs_line_number()
3167 return [match.string], next_state, []
3168
3169 def quoted(self, match, context, next_state):
3170 """Match consistent quotes on subsequent lines."""
3171 context.append(match.string)
3172 return context, next_state, []
3173
3174 def text(self, match, context, next_state):
3175 if context:
3176 self.messages.append(
3177 self.reporter.error('Inconsistent literal block quoting.',
3178 line=self.state_machine.abs_line_number()))
3179 self.state_machine.previous_line()
3180 raise EOFError
3181
3182
3183state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
3184 OptionList, LineBlock, ExtensionOptions, Explicit, Text,
3185 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
3186"""Standard set of State classes used to start `RSTStateMachine`."""