1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
4
5"""
6This is the ``docutils.parsers.rst.states`` module, the core of
7the reStructuredText parser. It defines the following:
8
9:Classes:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: obsolete, use `types.SimpleNamespace`.
30
31:Exception classes:
32 - `MarkupError`
33 - `ParserError`
34 - `MarkupMismatch`
35
36:Functions:
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
39
40:Attributes:
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
42
43Parser Overview
44===============
45
46The reStructuredText parser is implemented as a recursive state machine,
47examining its input one line at a time. To understand how the parser works,
48please first become familiar with the `docutils.statemachine` module. In the
49description below, references are made to classes defined in this module;
50please see the individual classes for details.
51
52Parsing proceeds as follows:
53
541. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
58
592. The method associated with the matched transition pattern is called.
60
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
65
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
70
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
73
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
83
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
86
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
90
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
93
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
96 used.
97
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
101"""
102
103from __future__ import annotations
104
105__docformat__ = 'reStructuredText'
106
107import re
108from types import FunctionType, MethodType
109from types import SimpleNamespace as Struct
110
111from docutils import nodes, statemachine, utils
112from docutils import ApplicationError, DataError
113from docutils.statemachine import StateMachineWS, StateWS
114from docutils.nodes import fully_normalize_name as normalize_name
115from docutils.nodes import unescape, whitespace_normalize_name
116import docutils.parsers.rst
117from docutils.parsers.rst import directives, languages, tableparser, roles
118from docutils.utils import escape2null, column_width
119from docutils.utils import punctuation_chars, urischemes
120from docutils.utils import split_escaped_whitespace
121from docutils.utils._roman_numerals import (InvalidRomanNumeralError,
122 RomanNumeral)
123
124
125class MarkupError(DataError): pass
126class UnknownInterpretedRoleError(DataError): pass
127class InterpretedRoleNotImplementedError(DataError): pass
128class ParserError(ApplicationError): pass
129class MarkupMismatch(Exception): pass
130
131
132class RSTStateMachine(StateMachineWS):
133
134 """
135 reStructuredText's master StateMachine.
136
137 The entry point to reStructuredText parsing is the `run()` method.
138 """
139
140 def run(self, input_lines, document, input_offset=0, match_titles=True,
141 inliner=None) -> None:
142 """
143 Parse `input_lines` and modify the `document` node in place.
144
145 Extend `StateMachineWS.run()`: set up parse-global data and
146 run the StateMachine.
147 """
148 self.language = languages.get_language(
149 document.settings.language_code, document.reporter)
150 self.match_titles = match_titles
151 if inliner is None:
152 inliner = Inliner()
153 inliner.init_customizations(document.settings)
154 self.memo = Struct(document=document,
155 reporter=document.reporter,
156 language=self.language,
157 title_styles=[],
158 section_level=0, # ignored, to be removed in 2.0
159 section_bubble_up_kludge=False, # ignored, ""
160 inliner=inliner)
161 self.document = document
162 self.attach_observer(document.note_source)
163 self.reporter = self.memo.reporter
164 self.node = document
165 results = StateMachineWS.run(self, input_lines, input_offset,
166 input_source=document['source'])
167 assert results == [], 'RSTStateMachine.run() results should be empty!'
168 self.node = self.memo = None # remove unneeded references
169
170
171class NestedStateMachine(StateMachineWS):
172
173 """
174 StateMachine run from within other StateMachine runs, to parse nested
175 document structures.
176 """
177
178 def run(self, input_lines, input_offset, memo, node, match_titles=True):
179 """
180 Parse `input_lines` and populate a `docutils.nodes.document` instance.
181
182 Extend `StateMachineWS.run()`: set up document-wide data.
183 """
184 self.match_titles = match_titles
185 self.memo = memo
186 self.document = memo.document
187 self.attach_observer(self.document.note_source)
188 self.reporter = memo.reporter
189 self.language = memo.language
190 self.node = node
191 results = StateMachineWS.run(self, input_lines, input_offset)
192 assert results == [], ('NestedStateMachine.run() results should be '
193 'empty!')
194 return results
195
196
197class RSTState(StateWS):
198
199 """
200 reStructuredText State superclass.
201
202 Contains methods used by all State subclasses.
203 """
204
205 nested_sm = NestedStateMachine
206 nested_sm_cache = []
207
208 def __init__(self, state_machine, debug=False) -> None:
209 self.nested_sm_kwargs = {'state_classes': state_classes,
210 'initial_state': 'Body'}
211 StateWS.__init__(self, state_machine, debug)
212
213 def runtime_init(self) -> None:
214 StateWS.runtime_init(self)
215 memo = self.state_machine.memo
216 self.memo = memo
217 self.reporter = memo.reporter
218 self.inliner = memo.inliner
219 self.document = memo.document
220 self.parent = self.state_machine.node
221 # enable the reporter to determine source and source-line
222 if not hasattr(self.reporter, 'get_source_and_line'):
223 self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501
224
225 def goto_line(self, abs_line_offset) -> None:
226 """
227 Jump to input line `abs_line_offset`, ignoring jumps past the end.
228 """
229 try:
230 self.state_machine.goto_line(abs_line_offset)
231 except EOFError:
232 pass
233
234 def no_match(self, context, transitions):
235 """
236 Override `StateWS.no_match` to generate a system message.
237
238 This code should never be run.
239 """
240 self.reporter.severe(
241 'Internal error: no transition pattern match. State: "%s"; '
242 'transitions: %s; context: %s; current line: %r.'
243 % (self.__class__.__name__, transitions, context,
244 self.state_machine.line))
245 return context, None, []
246
247 def bof(self, context):
248 """Called at beginning of file."""
249 return [], []
250
251 def nested_parse(self, block, input_offset, node, match_titles=False,
252 state_machine_class=None, state_machine_kwargs=None):
253 """
254 Create a new StateMachine rooted at `node` and run it over the input
255 `block`.
256 """
257 use_default = 0
258 if state_machine_class is None:
259 state_machine_class = self.nested_sm
260 use_default += 1
261 if state_machine_kwargs is None:
262 state_machine_kwargs = self.nested_sm_kwargs
263 use_default += 1
264 block_length = len(block)
265
266 state_machine = None
267 if use_default == 2:
268 try:
269 state_machine = self.nested_sm_cache.pop()
270 except IndexError:
271 pass
272 if not state_machine:
273 state_machine = state_machine_class(debug=self.debug,
274 **state_machine_kwargs)
275 state_machine.run(block, input_offset, memo=self.memo,
276 node=node, match_titles=match_titles)
277 if use_default == 2:
278 self.nested_sm_cache.append(state_machine)
279 else:
280 state_machine.unlink()
281 new_offset = state_machine.abs_line_offset()
282 # No `block.parent` implies disconnected -- lines aren't in sync:
283 if block.parent and (len(block) - block_length) != 0:
284 # Adjustment for block if modified in nested parse:
285 self.state_machine.next_line(len(block) - block_length)
286 return new_offset
287
288 def nested_list_parse(self, block, input_offset, node, initial_state,
289 blank_finish,
290 blank_finish_state=None,
291 extra_settings={},
292 match_titles=False,
293 state_machine_class=None,
294 state_machine_kwargs=None):
295 """
296 Create a new StateMachine rooted at `node` and run it over the input
297 `block`. Also keep track of optional intermediate blank lines and the
298 required final one.
299 """
300 if state_machine_class is None:
301 state_machine_class = self.nested_sm
302 if state_machine_kwargs is None:
303 state_machine_kwargs = self.nested_sm_kwargs.copy()
304 state_machine_kwargs['initial_state'] = initial_state
305 state_machine = state_machine_class(debug=self.debug,
306 **state_machine_kwargs)
307 if blank_finish_state is None:
308 blank_finish_state = initial_state
309 state_machine.states[blank_finish_state].blank_finish = blank_finish
310 for key, value in extra_settings.items():
311 setattr(state_machine.states[initial_state], key, value)
312 state_machine.run(block, input_offset, memo=self.memo,
313 node=node, match_titles=match_titles)
314 blank_finish = state_machine.states[blank_finish_state].blank_finish
315 state_machine.unlink()
316 return state_machine.abs_line_offset(), blank_finish
317
318 def section(self, title, source, style, lineno, messages) -> None:
319 """Check for a valid subsection and create one if it checks out."""
320 if self.check_subsection(source, style, lineno):
321 self.new_subsection(title, lineno, messages)
322
323 def check_subsection(self, source, style, lineno) -> bool:
324 """
325 Check for a valid subsection header. Update section data in `memo`.
326
327 When a new section is reached that isn't a subsection of the current
328 section, set `self.parent` to the new section's parent section
329 (or the document if the new section is a top-level section).
330 """
331 title_styles = self.memo.title_styles
332 parent_sections = self.parent.section_hierarchy()
333 # current section level: (0 document, 1 section, 2 subsection, ...)
334 mylevel = len(parent_sections)
335 # Determine the level of the new section:
336 try: # check for existing title style
337 level = title_styles.index(style) + 1
338 except ValueError: # new title style
339 title_styles.append(style)
340 level = len(title_styles)
341 # The new level must not be deeper than an immediate child
342 # of the current level:
343 if level > mylevel + 1:
344 styles = " ".join("/".join(s for s in style)
345 for style in title_styles)
346 self.parent += self.reporter.severe(
347 'Inconsistent title style:'
348 f' skip from level {mylevel} to {level}.',
349 nodes.literal_block('', source),
350 nodes.paragraph('', f'Established title styles: {styles}'),
351 line=lineno)
352 return False
353 # Update parent state:
354 self.memo.section_level = level
355 if level <= mylevel:
356 # new section is sibling or higher up in the section hierarchy
357 self.parent = parent_sections[level-1].parent
358 return True
359
360 def title_inconsistent(self, sourcetext, lineno):
361 # Ignored. Will be removed in Docutils 2.0.
362 error = self.reporter.severe(
363 'Title level inconsistent:', nodes.literal_block('', sourcetext),
364 line=lineno)
365 return error
366
367 def new_subsection(self, title, lineno, messages):
368 """Append new subsection to document tree."""
369 section_node = nodes.section()
370 self.parent += section_node
371 textnodes, title_messages = self.inline_text(title, lineno)
372 titlenode = nodes.title(title, '', *textnodes)
373 name = normalize_name(titlenode.astext())
374 section_node['names'].append(name)
375 section_node += titlenode
376 section_node += messages
377 section_node += title_messages
378 self.document.note_implicit_target(section_node, section_node)
379 # Update state:
380 self.state_machine.node = section_node
381 # Also update the ".parent" attribute in all states.
382 # This is a bit violent, but the state classes copy their .parent from
383 # state_machine.node on creation, so we need to update them. We could
384 # also remove RSTState.parent entirely and replace references to it
385 # with statemachine.node, but that might break code downstream of
386 # docutils.
387 for s in self.state_machine.states.values():
388 s.parent = section_node
389
390 def paragraph(self, lines, lineno):
391 """
392 Return a list (paragraph & messages) & a boolean: literal_block next?
393 """
394 data = '\n'.join(lines).rstrip()
395 if re.search(r'(?<!\\)(\\\\)*::$', data):
396 if len(data) == 2:
397 return [], 1
398 elif data[-3] in ' \n':
399 text = data[:-3].rstrip()
400 else:
401 text = data[:-1]
402 literalnext = 1
403 else:
404 text = data
405 literalnext = 0
406 textnodes, messages = self.inline_text(text, lineno)
407 p = nodes.paragraph(data, '', *textnodes)
408 p.source, p.line = self.state_machine.get_source_and_line(lineno)
409 return [p] + messages, literalnext
410
411 def inline_text(self, text, lineno):
412 """
413 Return 2 lists: nodes (text and inline elements), and system_messages.
414 """
415 nodes, messages = self.inliner.parse(text, lineno,
416 self.memo, self.parent)
417 return nodes, messages
418
419 def unindent_warning(self, node_name):
420 # the actual problem is one line below the current line
421 lineno = self.state_machine.abs_line_number() + 1
422 return self.reporter.warning('%s ends without a blank line; '
423 'unexpected unindent.' % node_name,
424 line=lineno)
425
426
427def build_regexp(definition, compile_patterns=True):
428 """
429 Build, compile and return a regular expression based on `definition`.
430
431 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
432 where "parts" is a list of regular expressions and/or regular
433 expression definitions to be joined into an or-group.
434 """
435 name, prefix, suffix, parts = definition
436 part_strings = []
437 for part in parts:
438 if isinstance(part, tuple):
439 part_strings.append(build_regexp(part, None))
440 else:
441 part_strings.append(part)
442 or_group = '|'.join(part_strings)
443 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
444 if compile_patterns:
445 return re.compile(regexp)
446 else:
447 return regexp
448
449
450class Inliner:
451
452 """
453 Parse inline markup; call the `parse()` method.
454 """
455
456 def __init__(self) -> None:
457 self.implicit_dispatch = []
458 """List of (pattern, bound method) tuples, used by
459 `self.implicit_inline`."""
460
461 def init_customizations(self, settings) -> None:
462 # lookahead and look-behind expressions for inline markup rules
463 if getattr(settings, 'character_level_inline_markup', False):
464 start_string_prefix = '(^|(?<!\x00))'
465 end_string_suffix = ''
466 else:
467 start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %
468 (punctuation_chars.openers,
469 punctuation_chars.delimiters))
470 end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %
471 (punctuation_chars.closing_delimiters,
472 punctuation_chars.delimiters,
473 punctuation_chars.closers))
474 args = locals().copy()
475 args.update(vars(self.__class__))
476
477 parts = ('initial_inline', start_string_prefix, '',
478 [
479 ('start', '', self.non_whitespace_after, # simple start-strings
480 [r'\*\*', # strong
481 r'\*(?!\*)', # emphasis but not strong
482 r'``', # literal
483 r'_`', # inline internal target
484 r'\|(?!\|)'] # substitution reference
485 ),
486 ('whole', '', end_string_suffix, # whole constructs
487 [ # reference name & end-string
488 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,
489 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
490 [r'[0-9]+', # manually numbered
491 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)
492 r'\*', # auto-symbol
493 r'(?P<citationlabel>%s)' % self.simplename, # citation ref
494 ]
495 )
496 ]
497 ),
498 ('backquote', # interpreted text or phrase reference
499 '(?P<role>(:%s:)?)' % self.simplename, # optional role
500 self.non_whitespace_after,
501 ['`(?!`)'] # but not literal
502 )
503 ]
504 )
505 self.start_string_prefix = start_string_prefix
506 self.end_string_suffix = end_string_suffix
507 self.parts = parts
508
509 self.patterns = Struct(
510 initial=build_regexp(parts),
511 emphasis=re.compile(self.non_whitespace_escape_before
512 + r'(\*)' + end_string_suffix),
513 strong=re.compile(self.non_whitespace_escape_before
514 + r'(\*\*)' + end_string_suffix),
515 interpreted_or_phrase_ref=re.compile(
516 r"""
517 %(non_unescaped_whitespace_escape_before)s
518 (
519 `
520 (?P<suffix>
521 (?P<role>:%(simplename)s:)?
522 (?P<refend>__?)?
523 )
524 )
525 %(end_string_suffix)s
526 """ % args, re.VERBOSE),
527 embedded_link=re.compile(
528 r"""
529 (
530 (?:[ \n]+|^) # spaces or beginning of line/string
531 < # open bracket
532 %(non_whitespace_after)s
533 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets
534 %(non_whitespace_escape_before)s
535 > # close bracket
536 )
537 $ # end of string
538 """ % args, re.VERBOSE),
539 literal=re.compile(self.non_whitespace_before + '(``)'
540 + end_string_suffix),
541 target=re.compile(self.non_whitespace_escape_before
542 + r'(`)' + end_string_suffix),
543 substitution_ref=re.compile(self.non_whitespace_escape_before
544 + r'(\|_{0,2})'
545 + end_string_suffix),
546 email=re.compile(self.email_pattern % args + '$',
547 re.VERBOSE),
548 uri=re.compile(
549 (r"""
550 %(start_string_prefix)s
551 (?P<whole>
552 (?P<absolute> # absolute URI
553 (?P<scheme> # scheme (http, ftp, mailto)
554 [a-zA-Z][a-zA-Z0-9.+-]*
555 )
556 :
557 (
558 ( # either:
559 (//?)? # hierarchical URI
560 %(uric)s* # URI characters
561 %(uri_end)s # final URI char
562 )
563 ( # optional query
564 \?%(uric)s*
565 %(uri_end)s
566 )?
567 ( # optional fragment
568 \#%(uric)s*
569 %(uri_end)s
570 )?
571 )
572 )
573 | # *OR*
574 (?P<email> # email address
575 """ + self.email_pattern + r"""
576 )
577 )
578 %(end_string_suffix)s
579 """) % args, re.VERBOSE),
580 pep=re.compile(
581 r"""
582 %(start_string_prefix)s
583 (
584 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
585 |
586 (PEP\s+(?P<pepnum2>\d+)) # reference by name
587 )
588 %(end_string_suffix)s""" % args, re.VERBOSE),
589 rfc=re.compile(
590 r"""
591 %(start_string_prefix)s
592 (RFC(-|\s+)?(?P<rfcnum>\d+))
593 %(end_string_suffix)s""" % args, re.VERBOSE))
594
595 self.implicit_dispatch.append((self.patterns.uri,
596 self.standalone_uri))
597 if settings.pep_references:
598 self.implicit_dispatch.append((self.patterns.pep,
599 self.pep_reference))
600 if settings.rfc_references:
601 self.implicit_dispatch.append((self.patterns.rfc,
602 self.rfc_reference))
603
604 def parse(self, text, lineno, memo, parent):
605 # Needs to be refactored for nested inline markup.
606 # Add nested_parse() method?
607 """
608 Return 2 lists: nodes (text and inline elements), and system_messages.
609
610 Using `self.patterns.initial`, a pattern which matches start-strings
611 (emphasis, strong, interpreted, phrase reference, literal,
612 substitution reference, and inline target) and complete constructs
613 (simple reference, footnote reference), search for a candidate. When
614 one is found, check for validity (e.g., not a quoted '*' character).
615 If valid, search for the corresponding end string if applicable, and
616 check it for validity. If not found or invalid, generate a warning
617 and ignore the start-string. Implicit inline markup (e.g. standalone
618 URIs) is found last.
619
620 :text: source string
621 :lineno: absolute line number, cf. `statemachine.get_source_and_line()`
622 """
623 self.reporter = memo.reporter
624 self.document = memo.document
625 self.language = memo.language
626 self.parent = parent
627 pattern_search = self.patterns.initial.search
628 dispatch = self.dispatch
629 remaining = escape2null(text)
630 processed = []
631 unprocessed = []
632 messages = []
633 while remaining:
634 match = pattern_search(remaining)
635 if match:
636 groups = match.groupdict()
637 method = dispatch[groups['start'] or groups['backquote']
638 or groups['refend'] or groups['fnend']]
639 before, inlines, remaining, sysmessages = method(self, match,
640 lineno)
641 unprocessed.append(before)
642 messages += sysmessages
643 if inlines:
644 processed += self.implicit_inline(''.join(unprocessed),
645 lineno)
646 processed += inlines
647 unprocessed = []
648 else:
649 break
650 remaining = ''.join(unprocessed) + remaining
651 if remaining:
652 processed += self.implicit_inline(remaining, lineno)
653 return processed, messages
654
655 # Inline object recognition
656 # -------------------------
657 # See also init_customizations().
658 non_whitespace_before = r'(?<!\s)'
659 non_whitespace_escape_before = r'(?<![\s\x00])'
660 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'
661 non_whitespace_after = r'(?!\s)'
662 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
663 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
664 # Valid URI characters (see RFC 2396 & RFC 2732);
665 # final \x00 allows backslash escapes in URIs:
666 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
667 # Delimiter indicating the end of a URI (not part of the URI):
668 uri_end_delim = r"""[>]"""
669 # Last URI character; same as uric but no punctuation:
670 urilast = r"""[_~*/=+a-zA-Z0-9]"""
671 # End of a URI (either 'urilast' or 'uric followed by a
672 # uri_end_delim'):
673 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
674 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
675 email_pattern = r"""
676 %(emailc)s+(?:\.%(emailc)s+)* # name
677 (?<!\x00)@ # at
678 %(emailc)s+(?:\.%(emailc)s*)* # host
679 %(uri_end)s # final URI char
680 """
681
682 def quoted_start(self, match):
683 """Test if inline markup start-string is 'quoted'.
684
685 'Quoted' in this context means the start-string is enclosed in a pair
686 of matching opening/closing delimiters (not necessarily quotes)
687 or at the end of the match.
688 """
689 string = match.string
690 start = match.start()
691 if start == 0: # start-string at beginning of text
692 return False
693 prestart = string[start - 1]
694 try:
695 poststart = string[match.end()]
696 except IndexError: # start-string at end of text
697 return True # not "quoted" but no markup start-string either
698 return punctuation_chars.match_chars(prestart, poststart)
699
700 def inline_obj(self, match, lineno, end_pattern, nodeclass,
701 restore_backslashes=False):
702 string = match.string
703 matchstart = match.start('start')
704 matchend = match.end('start')
705 if self.quoted_start(match):
706 return string[:matchend], [], string[matchend:], [], ''
707 endmatch = end_pattern.search(string[matchend:])
708 if endmatch and endmatch.start(1): # 1 or more chars
709 text = endmatch.string[:endmatch.start(1)]
710 if restore_backslashes:
711 text = unescape(text, True)
712 textend = matchend + endmatch.end(1)
713 rawsource = unescape(string[matchstart:textend], True)
714 node = nodeclass(rawsource, text)
715 return (string[:matchstart], [node],
716 string[textend:], [], endmatch.group(1))
717 msg = self.reporter.warning(
718 'Inline %s start-string without end-string.'
719 % nodeclass.__name__, line=lineno)
720 text = unescape(string[matchstart:matchend], True)
721 prb = self.problematic(text, text, msg)
722 return string[:matchstart], [prb], string[matchend:], [msg], ''
723
724 def problematic(self, text, rawsource, message):
725 msgid = self.document.set_id(message, self.parent)
726 problematic = nodes.problematic(rawsource, text, refid=msgid)
727 prbid = self.document.set_id(problematic)
728 message.add_backref(prbid)
729 return problematic
730
731 def emphasis(self, match, lineno):
732 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
733 match, lineno, self.patterns.emphasis, nodes.emphasis)
734 return before, inlines, remaining, sysmessages
735
736 def strong(self, match, lineno):
737 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
738 match, lineno, self.patterns.strong, nodes.strong)
739 return before, inlines, remaining, sysmessages
740
741 def interpreted_or_phrase_ref(self, match, lineno):
742 end_pattern = self.patterns.interpreted_or_phrase_ref
743 string = match.string
744 matchstart = match.start('backquote')
745 matchend = match.end('backquote')
746 rolestart = match.start('role')
747 role = match.group('role')
748 position = ''
749 if role:
750 role = role[1:-1]
751 position = 'prefix'
752 elif self.quoted_start(match):
753 return string[:matchend], [], string[matchend:], []
754 endmatch = end_pattern.search(string[matchend:])
755 if endmatch and endmatch.start(1): # 1 or more chars
756 textend = matchend + endmatch.end()
757 if endmatch.group('role'):
758 if role:
759 msg = self.reporter.warning(
760 'Multiple roles in interpreted text (both '
761 'prefix and suffix present; only one allowed).',
762 line=lineno)
763 text = unescape(string[rolestart:textend], True)
764 prb = self.problematic(text, text, msg)
765 return string[:rolestart], [prb], string[textend:], [msg]
766 role = endmatch.group('suffix')[1:-1]
767 position = 'suffix'
768 escaped = endmatch.string[:endmatch.start(1)]
769 rawsource = unescape(string[matchstart:textend], True)
770 if rawsource[-1:] == '_':
771 if role:
772 msg = self.reporter.warning(
773 'Mismatch: both interpreted text role %s and '
774 'reference suffix.' % position, line=lineno)
775 text = unescape(string[rolestart:textend], True)
776 prb = self.problematic(text, text, msg)
777 return string[:rolestart], [prb], string[textend:], [msg]
778 return self.phrase_ref(string[:matchstart], string[textend:],
779 rawsource, escaped)
780 else:
781 rawsource = unescape(string[rolestart:textend], True)
782 nodelist, messages = self.interpreted(rawsource, escaped, role,
783 lineno)
784 return (string[:rolestart], nodelist,
785 string[textend:], messages)
786 msg = self.reporter.warning(
787 'Inline interpreted text or phrase reference start-string '
788 'without end-string.', line=lineno)
789 text = unescape(string[matchstart:matchend], True)
790 prb = self.problematic(text, text, msg)
791 return string[:matchstart], [prb], string[matchend:], [msg]
792
793 def phrase_ref(self, before, after, rawsource, escaped, text=None):
794 # `text` is ignored (since 0.16)
795 match = self.patterns.embedded_link.search(escaped)
796 if match: # embedded <URI> or <alias_>
797 text = escaped[:match.start(0)]
798 unescaped = unescape(text)
799 rawtext = unescape(text, True)
800 aliastext = match.group(2)
801 rawaliastext = unescape(aliastext, True)
802 underscore_escaped = rawaliastext.endswith(r'\_')
803 if (aliastext.endswith('_')
804 and not (underscore_escaped
805 or self.patterns.uri.match(aliastext))):
806 aliastype = 'name'
807 alias = normalize_name(unescape(aliastext[:-1]))
808 target = nodes.target(match.group(1), refname=alias)
809 target.indirect_reference_name = whitespace_normalize_name(
810 unescape(aliastext[:-1]))
811 else:
812 aliastype = 'uri'
813 # remove unescaped whitespace
814 alias_parts = split_escaped_whitespace(match.group(2))
815 alias = ' '.join(''.join(part.split())
816 for part in alias_parts)
817 alias = self.adjust_uri(unescape(alias))
818 if alias.endswith(r'\_'):
819 alias = alias[:-2] + '_'
820 target = nodes.target(match.group(1), refuri=alias)
821 target.referenced = 1
822 if not aliastext:
823 raise ApplicationError('problem with embedded link: %r'
824 % aliastext)
825 if not text:
826 text = alias
827 unescaped = unescape(text)
828 rawtext = rawaliastext
829 else:
830 text = escaped
831 unescaped = unescape(text)
832 target = None
833 rawtext = unescape(escaped, True)
834
835 refname = normalize_name(unescaped)
836 reference = nodes.reference(rawsource, text,
837 name=whitespace_normalize_name(unescaped))
838 reference[0].rawsource = rawtext
839
840 node_list = [reference]
841
842 if rawsource[-2:] == '__':
843 if target and (aliastype == 'name'):
844 reference['refname'] = alias
845 self.document.note_refname(reference)
846 # self.document.note_indirect_target(target) # required?
847 elif target and (aliastype == 'uri'):
848 reference['refuri'] = alias
849 else:
850 reference['anonymous'] = True
851 else:
852 if target:
853 target['names'].append(refname)
854 if aliastype == 'name':
855 reference['refname'] = alias
856 self.document.note_indirect_target(target)
857 self.document.note_refname(reference)
858 else:
859 reference['refuri'] = alias
860 # target.note_referenced_by(name=refname)
861 self.document.note_explicit_target(target, self.parent)
862 node_list.append(target)
863 else:
864 reference['refname'] = refname
865 self.document.note_refname(reference)
866 return before, node_list, after, []
867
868 def adjust_uri(self, uri):
869 match = self.patterns.email.match(uri)
870 if match:
871 return 'mailto:' + uri
872 else:
873 return uri
874
875 def interpreted(self, rawsource, text, role, lineno):
876 role_fn, messages = roles.role(role, self.language, lineno,
877 self.reporter)
878 if role_fn:
879 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
880 return nodes, messages + messages2
881 else:
882 msg = self.reporter.error(
883 'Unknown interpreted text role "%s".' % role,
884 line=lineno)
885 return ([self.problematic(rawsource, rawsource, msg)],
886 messages + [msg])
887
888 def literal(self, match, lineno):
889 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
890 match, lineno, self.patterns.literal, nodes.literal,
891 restore_backslashes=True)
892 return before, inlines, remaining, sysmessages
893
894 def inline_internal_target(self, match, lineno):
895 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
896 match, lineno, self.patterns.target, nodes.target)
897 if inlines and isinstance(inlines[0], nodes.target):
898 assert len(inlines) == 1
899 target = inlines[0]
900 name = normalize_name(target.astext())
901 target['names'].append(name)
902 self.document.note_explicit_target(target, self.parent)
903 return before, inlines, remaining, sysmessages
904
905 def substitution_reference(self, match, lineno):
906 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
907 match, lineno, self.patterns.substitution_ref,
908 nodes.substitution_reference)
909 if len(inlines) == 1:
910 subref_node = inlines[0]
911 if isinstance(subref_node, nodes.substitution_reference):
912 subref_text = subref_node.astext()
913 self.document.note_substitution_ref(subref_node, subref_text)
914 if endstring[-1:] == '_':
915 reference_node = nodes.reference(
916 '|%s%s' % (subref_text, endstring), '')
917 if endstring[-2:] == '__':
918 reference_node['anonymous'] = True
919 else:
920 reference_node['refname'] = normalize_name(subref_text)
921 self.document.note_refname(reference_node)
922 reference_node += subref_node
923 inlines = [reference_node]
924 return before, inlines, remaining, sysmessages
925
926 def footnote_reference(self, match, lineno):
927 """
928 Handles `nodes.footnote_reference` and `nodes.citation_reference`
929 elements.
930 """
931 label = match.group('footnotelabel')
932 refname = normalize_name(label)
933 string = match.string
934 before = string[:match.start('whole')]
935 remaining = string[match.end('whole'):]
936 if match.group('citationlabel'):
937 refnode = nodes.citation_reference('[%s]_' % label,
938 refname=refname)
939 refnode += nodes.Text(label)
940 self.document.note_citation_ref(refnode)
941 else:
942 refnode = nodes.footnote_reference('[%s]_' % label)
943 if refname[0] == '#':
944 refname = refname[1:]
945 refnode['auto'] = 1
946 self.document.note_autofootnote_ref(refnode)
947 elif refname == '*':
948 refname = ''
949 refnode['auto'] = '*'
950 self.document.note_symbol_footnote_ref(
951 refnode)
952 else:
953 refnode += nodes.Text(label)
954 if refname:
955 refnode['refname'] = refname
956 self.document.note_footnote_ref(refnode)
957 if utils.get_trim_footnote_ref_space(self.document.settings):
958 before = before.rstrip()
959 return before, [refnode], remaining, []
960
961 def reference(self, match, lineno, anonymous=False):
962 referencename = match.group('refname')
963 refname = normalize_name(referencename)
964 referencenode = nodes.reference(
965 referencename + match.group('refend'), referencename,
966 name=whitespace_normalize_name(referencename))
967 referencenode[0].rawsource = referencename
968 if anonymous:
969 referencenode['anonymous'] = True
970 else:
971 referencenode['refname'] = refname
972 self.document.note_refname(referencenode)
973 string = match.string
974 matchstart = match.start('whole')
975 matchend = match.end('whole')
976 return string[:matchstart], [referencenode], string[matchend:], []
977
978 def anonymous_reference(self, match, lineno):
979 return self.reference(match, lineno, anonymous=True)
980
981 def standalone_uri(self, match, lineno):
982 if (not match.group('scheme')
983 or match.group('scheme').lower() in urischemes.schemes):
984 if match.group('email'):
985 addscheme = 'mailto:'
986 else:
987 addscheme = ''
988 text = match.group('whole')
989 refuri = addscheme + unescape(text)
990 reference = nodes.reference(unescape(text, True), text,
991 refuri=refuri)
992 return [reference]
993 else: # not a valid scheme
994 raise MarkupMismatch
995
996 def pep_reference(self, match, lineno):
997 text = match.group(0)
998 if text.startswith('pep-'):
999 pepnum = int(unescape(match.group('pepnum1')))
1000 elif text.startswith('PEP'):
1001 pepnum = int(unescape(match.group('pepnum2')))
1002 else:
1003 raise MarkupMismatch
1004 ref = (self.document.settings.pep_base_url
1005 + self.document.settings.pep_file_url_template % pepnum)
1006 return [nodes.reference(unescape(text, True), text, refuri=ref)]
1007
1008 rfc_url = 'rfc%d.html'
1009
1010 def rfc_reference(self, match, lineno):
1011 text = match.group(0)
1012 if text.startswith('RFC'):
1013 rfcnum = int(unescape(match.group('rfcnum')))
1014 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
1015 else:
1016 raise MarkupMismatch
1017 return [nodes.reference(unescape(text, True), text, refuri=ref)]
1018
1019 def implicit_inline(self, text, lineno):
1020 """
1021 Check each of the patterns in `self.implicit_dispatch` for a match,
1022 and dispatch to the stored method for the pattern. Recursively check
1023 the text before and after the match. Return a list of `nodes.Text`
1024 and inline element nodes.
1025 """
1026 if not text:
1027 return []
1028 for pattern, method in self.implicit_dispatch:
1029 match = pattern.search(text)
1030 if match:
1031 try:
1032 # Must recurse on strings before *and* after the match;
1033 # there may be multiple patterns.
1034 return (self.implicit_inline(text[:match.start()], lineno)
1035 + method(match, lineno)
1036 + self.implicit_inline(text[match.end():], lineno))
1037 except MarkupMismatch:
1038 pass
1039 return [nodes.Text(text)]
1040
1041 dispatch = {'*': emphasis,
1042 '**': strong,
1043 '`': interpreted_or_phrase_ref,
1044 '``': literal,
1045 '_`': inline_internal_target,
1046 ']_': footnote_reference,
1047 '|': substitution_reference,
1048 '_': reference,
1049 '__': anonymous_reference}
1050
1051
1052def _loweralpha_to_int(s, _zero=(ord('a')-1)):
1053 return ord(s) - _zero
1054
1055
1056def _upperalpha_to_int(s, _zero=(ord('A')-1)):
1057 return ord(s) - _zero
1058
1059
1060class Body(RSTState):
1061
1062 """
1063 Generic classifier of the first line of a block.
1064 """
1065
1066 double_width_pad_char = tableparser.TableParser.double_width_pad_char
1067 """Padding character for East Asian double-width text."""
1068
1069 enum = Struct()
1070 """Enumerated list parsing information."""
1071
1072 enum.formatinfo = {
1073 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
1074 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
1075 'period': Struct(prefix='', suffix='.', start=0, end=-1)}
1076 enum.formats = enum.formatinfo.keys()
1077 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
1078 'lowerroman', 'upperroman'] # ORDERED!
1079 enum.sequencepats = {'arabic': '[0-9]+',
1080 'loweralpha': '[a-z]',
1081 'upperalpha': '[A-Z]',
1082 'lowerroman': '[ivxlcdm]+',
1083 'upperroman': '[IVXLCDM]+'}
1084 enum.converters = {'arabic': int,
1085 'loweralpha': _loweralpha_to_int,
1086 'upperalpha': _upperalpha_to_int,
1087 'lowerroman': RomanNumeral.from_string,
1088 'upperroman': RomanNumeral.from_string}
1089
1090 enum.sequenceregexps = {}
1091 for sequence in enum.sequences:
1092 enum.sequenceregexps[sequence] = re.compile(
1093 enum.sequencepats[sequence] + '$')
1094
1095 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
1096 """Matches the top (& bottom) of a full table)."""
1097
1098 simple_table_top_pat = re.compile('=+( +=+)+ *$')
1099 """Matches the top of a simple table."""
1100
1101 simple_table_border_pat = re.compile('=+[ =]*$')
1102 """Matches the bottom & header bottom of a simple table."""
1103
1104 pats = {}
1105 """Fragments of patterns used by transitions."""
1106
1107 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1108 pats['alpha'] = '[a-zA-Z]'
1109 pats['alphanum'] = '[a-zA-Z0-9]'
1110 pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1111 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1112 '|%(upperroman)s|#)' % enum.sequencepats)
1113 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1114 # @@@ Loosen up the pattern? Allow Unicode?
1115 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1116 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1117 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1118 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1119
1120 for format in enum.formats:
1121 pats[format] = '(?P<%s>%s%s%s)' % (
1122 format, re.escape(enum.formatinfo[format].prefix),
1123 pats['enum'], re.escape(enum.formatinfo[format].suffix))
1124
1125 patterns = {
1126 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',
1127 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1128 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',
1129 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
1130 'doctest': r'>>>( +|$)',
1131 'line_block': r'\|( +|$)',
1132 'grid_table_top': grid_table_top_pat,
1133 'simple_table_top': simple_table_top_pat,
1134 'explicit_markup': r'\.\.( +|$)',
1135 'anonymous': r'__( +|$)',
1136 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
1137 'text': r''}
1138 initial_transitions = (
1139 'bullet',
1140 'enumerator',
1141 'field_marker',
1142 'option_marker',
1143 'doctest',
1144 'line_block',
1145 'grid_table_top',
1146 'simple_table_top',
1147 'explicit_markup',
1148 'anonymous',
1149 'line',
1150 'text')
1151
1152 def indent(self, match, context, next_state):
1153 """Block quote."""
1154 (indented, indent, line_offset, blank_finish
1155 ) = self.state_machine.get_indented()
1156 elements = self.block_quote(indented, line_offset)
1157 self.parent += elements
1158 if not blank_finish:
1159 self.parent += self.unindent_warning('Block quote')
1160 return context, next_state, []
1161
1162 def block_quote(self, indented, line_offset):
1163 elements = []
1164 while indented:
1165 blockquote = nodes.block_quote(rawsource='\n'.join(indented))
1166 (blockquote.source, blockquote.line
1167 ) = self.state_machine.get_source_and_line(line_offset+1)
1168 (blockquote_lines,
1169 attribution_lines,
1170 attribution_offset,
1171 indented,
1172 new_line_offset) = self.split_attribution(indented, line_offset)
1173 self.nested_parse(blockquote_lines, line_offset, blockquote)
1174 elements.append(blockquote)
1175 if attribution_lines:
1176 attribution, messages = self.parse_attribution(
1177 attribution_lines, line_offset+attribution_offset)
1178 blockquote += attribution
1179 elements += messages
1180 line_offset = new_line_offset
1181 while indented and not indented[0]:
1182 indented = indented[1:]
1183 line_offset += 1
1184 return elements
1185
1186 # U+2014 is an em-dash:
1187 attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')
1188
1189 def split_attribution(self, indented, line_offset):
1190 """
1191 Check for a block quote attribution and split it off:
1192
1193 * First line after a blank line must begin with a dash ("--", "---",
1194 em-dash; matches `self.attribution_pattern`).
1195 * Every line after that must have consistent indentation.
1196 * Attributions must be preceded by block quote content.
1197
1198 Return a tuple of: (block quote content lines, attribution lines,
1199 attribution offset, remaining indented lines, remaining lines offset).
1200 """
1201 blank = None
1202 nonblank_seen = False
1203 for i in range(len(indented)):
1204 line = indented[i].rstrip()
1205 if line:
1206 if nonblank_seen and blank == i - 1: # last line blank
1207 match = self.attribution_pattern.match(line)
1208 if match:
1209 attribution_end, indent = self.check_attribution(
1210 indented, i)
1211 if attribution_end:
1212 a_lines = indented[i:attribution_end]
1213 a_lines.trim_left(match.end(), end=1)
1214 a_lines.trim_left(indent, start=1)
1215 return (indented[:i], a_lines,
1216 i, indented[attribution_end:],
1217 line_offset + attribution_end)
1218 nonblank_seen = True
1219 else:
1220 blank = i
1221 else:
1222 return indented, None, None, None, None
1223
1224 def check_attribution(self, indented, attribution_start):
1225 """
1226 Check attribution shape.
1227 Return the index past the end of the attribution, and the indent.
1228 """
1229 indent = None
1230 i = attribution_start + 1
1231 for i in range(attribution_start + 1, len(indented)):
1232 line = indented[i].rstrip()
1233 if not line:
1234 break
1235 if indent is None:
1236 indent = len(line) - len(line.lstrip())
1237 elif len(line) - len(line.lstrip()) != indent:
1238 return None, None # bad shape; not an attribution
1239 else:
1240 # return index of line after last attribution line:
1241 i += 1
1242 return i, (indent or 0)
1243
1244 def parse_attribution(self, indented, line_offset):
1245 text = '\n'.join(indented).rstrip()
1246 lineno = 1 + line_offset # line_offset is zero-based
1247 textnodes, messages = self.inline_text(text, lineno)
1248 node = nodes.attribution(text, '', *textnodes)
1249 node.source, node.line = self.state_machine.get_source_and_line(lineno)
1250 return node, messages
1251
1252 def bullet(self, match, context, next_state):
1253 """Bullet list item."""
1254 ul = nodes.bullet_list()
1255 ul.source, ul.line = self.state_machine.get_source_and_line()
1256 self.parent += ul
1257 ul['bullet'] = match.string[0]
1258 i, blank_finish = self.list_item(match.end())
1259 ul += i
1260 offset = self.state_machine.line_offset + 1 # next line
1261 new_line_offset, blank_finish = self.nested_list_parse(
1262 self.state_machine.input_lines[offset:],
1263 input_offset=self.state_machine.abs_line_offset() + 1,
1264 node=ul, initial_state='BulletList',
1265 blank_finish=blank_finish)
1266 self.goto_line(new_line_offset)
1267 if not blank_finish:
1268 self.parent += self.unindent_warning('Bullet list')
1269 return [], next_state, []
1270
1271 def list_item(self, indent):
1272 src, srcline = self.state_machine.get_source_and_line()
1273 if self.state_machine.line[indent:]:
1274 indented, line_offset, blank_finish = (
1275 self.state_machine.get_known_indented(indent))
1276 else:
1277 indented, indent, line_offset, blank_finish = (
1278 self.state_machine.get_first_known_indented(indent))
1279 listitem = nodes.list_item('\n'.join(indented))
1280 listitem.source, listitem.line = src, srcline
1281 if indented:
1282 self.nested_parse(indented, input_offset=line_offset,
1283 node=listitem)
1284 return listitem, blank_finish
1285
1286 def enumerator(self, match, context, next_state):
1287 """Enumerated List Item"""
1288 format, sequence, text, ordinal = self.parse_enumerator(match)
1289 if not self.is_enumerated_list_item(ordinal, sequence, format):
1290 raise statemachine.TransitionCorrection('text')
1291 enumlist = nodes.enumerated_list()
1292 (enumlist.source,
1293 enumlist.line) = self.state_machine.get_source_and_line()
1294 self.parent += enumlist
1295 if sequence == '#':
1296 enumlist['enumtype'] = 'arabic'
1297 else:
1298 enumlist['enumtype'] = sequence
1299 enumlist['prefix'] = self.enum.formatinfo[format].prefix
1300 enumlist['suffix'] = self.enum.formatinfo[format].suffix
1301 if ordinal != 1:
1302 enumlist['start'] = ordinal
1303 msg = self.reporter.info(
1304 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1305 % (text, ordinal), base_node=enumlist)
1306 self.parent += msg
1307 listitem, blank_finish = self.list_item(match.end())
1308 enumlist += listitem
1309 offset = self.state_machine.line_offset + 1 # next line
1310 newline_offset, blank_finish = self.nested_list_parse(
1311 self.state_machine.input_lines[offset:],
1312 input_offset=self.state_machine.abs_line_offset() + 1,
1313 node=enumlist, initial_state='EnumeratedList',
1314 blank_finish=blank_finish,
1315 extra_settings={'lastordinal': ordinal,
1316 'format': format,
1317 'auto': sequence == '#'})
1318 self.goto_line(newline_offset)
1319 if not blank_finish:
1320 self.parent += self.unindent_warning('Enumerated list')
1321 return [], next_state, []
1322
1323 def parse_enumerator(self, match, expected_sequence=None):
1324 """
1325 Analyze an enumerator and return the results.
1326
1327 :Return:
1328 - the enumerator format ('period', 'parens', or 'rparen'),
1329 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1330 - the text of the enumerator, stripped of formatting, and
1331 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1332 ``None`` is returned for invalid enumerator text).
1333
1334 The enumerator format has already been determined by the regular
1335 expression match. If `expected_sequence` is given, that sequence is
1336 tried first. If not, we check for Roman numeral 1. This way,
1337 single-character Roman numerals (which are also alphabetical) can be
1338 matched. If no sequence has been matched, all sequences are checked in
1339 order.
1340 """
1341 groupdict = match.groupdict()
1342 sequence = ''
1343 for format in self.enum.formats:
1344 if groupdict[format]: # was this the format matched?
1345 break # yes; keep `format`
1346 else: # shouldn't happen
1347 raise ParserError('enumerator format not matched')
1348 text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501
1349 : self.enum.formatinfo[format].end]
1350 if text == '#':
1351 sequence = '#'
1352 elif expected_sequence:
1353 try:
1354 if self.enum.sequenceregexps[expected_sequence].match(text):
1355 sequence = expected_sequence
1356 except KeyError: # shouldn't happen
1357 raise ParserError('unknown enumerator sequence: %s'
1358 % sequence)
1359 elif text == 'i':
1360 sequence = 'lowerroman'
1361 elif text == 'I':
1362 sequence = 'upperroman'
1363 if not sequence:
1364 for sequence in self.enum.sequences:
1365 if self.enum.sequenceregexps[sequence].match(text):
1366 break
1367 else: # shouldn't happen
1368 raise ParserError('enumerator sequence not matched')
1369 if sequence == '#':
1370 ordinal = 1
1371 else:
1372 try:
1373 ordinal = int(self.enum.converters[sequence](text))
1374 except InvalidRomanNumeralError:
1375 ordinal = None
1376 return format, sequence, text, ordinal
1377
1378 def is_enumerated_list_item(self, ordinal, sequence, format):
1379 """
1380 Check validity based on the ordinal value and the second line.
1381
1382 Return true if the ordinal is valid and the second line is blank,
1383 indented, or starts with the next enumerator or an auto-enumerator.
1384 """
1385 if ordinal is None:
1386 return None
1387 try:
1388 next_line = self.state_machine.next_line()
1389 except EOFError: # end of input lines
1390 self.state_machine.previous_line()
1391 return 1
1392 else:
1393 self.state_machine.previous_line()
1394 if not next_line[:1].strip(): # blank or indented
1395 return 1
1396 result = self.make_enumerator(ordinal + 1, sequence, format)
1397 if result:
1398 next_enumerator, auto_enumerator = result
1399 try:
1400 if next_line.startswith((next_enumerator, auto_enumerator)):
1401 return 1
1402 except TypeError:
1403 pass
1404 return None
1405
1406 def make_enumerator(self, ordinal, sequence, format):
1407 """
1408 Construct and return the next enumerated list item marker, and an
1409 auto-enumerator ("#" instead of the regular enumerator).
1410
1411 Return ``None`` for invalid (out of range) ordinals.
1412 """
1413 if sequence == '#':
1414 enumerator = '#'
1415 elif sequence == 'arabic':
1416 enumerator = str(ordinal)
1417 else:
1418 if sequence.endswith('alpha'):
1419 if ordinal > 26:
1420 return None
1421 enumerator = chr(ordinal + ord('a') - 1)
1422 elif sequence.endswith('roman'):
1423 try:
1424 enumerator = RomanNumeral(ordinal).to_uppercase()
1425 except TypeError:
1426 return None
1427 else: # shouldn't happen
1428 raise ParserError('unknown enumerator sequence: "%s"'
1429 % sequence)
1430 if sequence.startswith('lower'):
1431 enumerator = enumerator.lower()
1432 elif sequence.startswith('upper'):
1433 enumerator = enumerator.upper()
1434 else: # shouldn't happen
1435 raise ParserError('unknown enumerator sequence: "%s"'
1436 % sequence)
1437 formatinfo = self.enum.formatinfo[format]
1438 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
1439 + ' ')
1440 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
1441 return next_enumerator, auto_enumerator
1442
1443 def field_marker(self, match, context, next_state):
1444 """Field list item."""
1445 field_list = nodes.field_list()
1446 self.parent += field_list
1447 field, blank_finish = self.field(match)
1448 field_list += field
1449 offset = self.state_machine.line_offset + 1 # next line
1450 newline_offset, blank_finish = self.nested_list_parse(
1451 self.state_machine.input_lines[offset:],
1452 input_offset=self.state_machine.abs_line_offset() + 1,
1453 node=field_list, initial_state='FieldList',
1454 blank_finish=blank_finish)
1455 self.goto_line(newline_offset)
1456 if not blank_finish:
1457 self.parent += self.unindent_warning('Field list')
1458 return [], next_state, []
1459
1460 def field(self, match):
1461 name = self.parse_field_marker(match)
1462 src, srcline = self.state_machine.get_source_and_line()
1463 lineno = self.state_machine.abs_line_number()
1464 (indented, indent, line_offset, blank_finish
1465 ) = self.state_machine.get_first_known_indented(match.end())
1466 field_node = nodes.field()
1467 field_node.source = src
1468 field_node.line = srcline
1469 name_nodes, name_messages = self.inline_text(name, lineno)
1470 field_node += nodes.field_name(name, '', *name_nodes)
1471 field_body = nodes.field_body('\n'.join(indented), *name_messages)
1472 field_node += field_body
1473 if indented:
1474 self.parse_field_body(indented, line_offset, field_body)
1475 return field_node, blank_finish
1476
1477 def parse_field_marker(self, match):
1478 """Extract & return field name from a field marker match."""
1479 field = match.group()[1:] # strip off leading ':'
1480 field = field[:field.rfind(':')] # strip off trailing ':' etc.
1481 return field
1482
1483 def parse_field_body(self, indented, offset, node) -> None:
1484 self.nested_parse(indented, input_offset=offset, node=node)
1485
1486 def option_marker(self, match, context, next_state):
1487 """Option list item."""
1488 optionlist = nodes.option_list()
1489 (optionlist.source, optionlist.line
1490 ) = self.state_machine.get_source_and_line()
1491 try:
1492 listitem, blank_finish = self.option_list_item(match)
1493 except MarkupError as error:
1494 # This shouldn't happen; pattern won't match.
1495 msg = self.reporter.error('Invalid option list marker: %s'
1496 % error)
1497 self.parent += msg
1498 (indented, indent, line_offset, blank_finish
1499 ) = self.state_machine.get_first_known_indented(match.end())
1500 elements = self.block_quote(indented, line_offset)
1501 self.parent += elements
1502 if not blank_finish:
1503 self.parent += self.unindent_warning('Option list')
1504 return [], next_state, []
1505 self.parent += optionlist
1506 optionlist += listitem
1507 offset = self.state_machine.line_offset + 1 # next line
1508 newline_offset, blank_finish = self.nested_list_parse(
1509 self.state_machine.input_lines[offset:],
1510 input_offset=self.state_machine.abs_line_offset() + 1,
1511 node=optionlist, initial_state='OptionList',
1512 blank_finish=blank_finish)
1513 self.goto_line(newline_offset)
1514 if not blank_finish:
1515 self.parent += self.unindent_warning('Option list')
1516 return [], next_state, []
1517
1518 def option_list_item(self, match):
1519 offset = self.state_machine.abs_line_offset()
1520 options = self.parse_option_marker(match)
1521 (indented, indent, line_offset, blank_finish
1522 ) = self.state_machine.get_first_known_indented(match.end())
1523 if not indented: # not an option list item
1524 self.goto_line(offset)
1525 raise statemachine.TransitionCorrection('text')
1526 option_group = nodes.option_group('', *options)
1527 description = nodes.description('\n'.join(indented))
1528 option_list_item = nodes.option_list_item('', option_group,
1529 description)
1530 if indented:
1531 self.nested_parse(indented, input_offset=line_offset,
1532 node=description)
1533 return option_list_item, blank_finish
1534
1535 def parse_option_marker(self, match):
1536 """
1537 Return a list of `node.option` and `node.option_argument` objects,
1538 parsed from an option marker match.
1539
1540 :Exception: `MarkupError` for invalid option markers.
1541 """
1542 optlist = []
1543 # split at ", ", except inside < > (complex arguments)
1544 optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip())
1545 for optionstring in optionstrings:
1546 tokens = optionstring.split()
1547 delimiter = ' '
1548 firstopt = tokens[0].split('=', 1)
1549 if len(firstopt) > 1:
1550 # "--opt=value" form
1551 tokens[:1] = firstopt
1552 delimiter = '='
1553 elif (len(tokens[0]) > 2
1554 and ((tokens[0].startswith('-')
1555 and not tokens[0].startswith('--'))
1556 or tokens[0].startswith('+'))):
1557 # "-ovalue" form
1558 tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1559 delimiter = ''
1560 if len(tokens) > 1 and (tokens[1].startswith('<')
1561 and tokens[-1].endswith('>')):
1562 # "-o <value1 value2>" form; join all values into one token
1563 tokens[1:] = [' '.join(tokens[1:])]
1564 if 0 < len(tokens) <= 2:
1565 option = nodes.option(optionstring)
1566 option += nodes.option_string(tokens[0], tokens[0])
1567 if len(tokens) > 1:
1568 option += nodes.option_argument(tokens[1], tokens[1],
1569 delimiter=delimiter)
1570 optlist.append(option)
1571 else:
1572 raise MarkupError(
1573 'wrong number of option tokens (=%s), should be 1 or 2: '
1574 '"%s"' % (len(tokens), optionstring))
1575 return optlist
1576
1577 def doctest(self, match, context, next_state):
1578 line = self.document.current_line
1579 data = '\n'.join(self.state_machine.get_text_block())
1580 # TODO: Parse with `directives.body.CodeBlock` with
1581 # argument 'pycon' (Python Console) in Docutils 1.0.
1582 n = nodes.doctest_block(data, data)
1583 n.line = line
1584 self.parent += n
1585 return [], next_state, []
1586
1587 def line_block(self, match, context, next_state):
1588 """First line of a line block."""
1589 block = nodes.line_block()
1590 self.parent += block
1591 lineno = self.state_machine.abs_line_number()
1592 (block.source,
1593 block.line) = self.state_machine.get_source_and_line(lineno)
1594 line, messages, blank_finish = self.line_block_line(match, lineno)
1595 block += line
1596 self.parent += messages
1597 if not blank_finish:
1598 offset = self.state_machine.line_offset + 1 # next line
1599 new_line_offset, blank_finish = self.nested_list_parse(
1600 self.state_machine.input_lines[offset:],
1601 input_offset=self.state_machine.abs_line_offset() + 1,
1602 node=block, initial_state='LineBlock',
1603 blank_finish=0)
1604 self.goto_line(new_line_offset)
1605 if not blank_finish:
1606 self.parent += self.reporter.warning(
1607 'Line block ends without a blank line.',
1608 line=lineno+1)
1609 if len(block):
1610 if block[0].indent is None:
1611 block[0].indent = 0
1612 self.nest_line_block_lines(block)
1613 return [], next_state, []
1614
1615 def line_block_line(self, match, lineno):
1616 """Return one line element of a line_block."""
1617 (indented, indent, line_offset, blank_finish
1618 ) = self.state_machine.get_first_known_indented(match.end(),
1619 until_blank=True)
1620 text = '\n'.join(indented)
1621 text_nodes, messages = self.inline_text(text, lineno)
1622 line = nodes.line(text, '', *text_nodes)
1623 (line.source,
1624 line.line) = self.state_machine.get_source_and_line(lineno)
1625 if match.string.rstrip() != '|': # not empty
1626 line.indent = len(match.group(1)) - 1
1627 return line, messages, blank_finish
1628
1629 def nest_line_block_lines(self, block) -> None:
1630 for index in range(1, len(block)):
1631 if block[index].indent is None:
1632 block[index].indent = block[index - 1].indent
1633 self.nest_line_block_segment(block)
1634
1635 def nest_line_block_segment(self, block) -> None:
1636 indents = [item.indent for item in block]
1637 least = min(indents)
1638 new_items = []
1639 new_block = nodes.line_block()
1640 for item in block:
1641 if item.indent > least:
1642 new_block.append(item)
1643 else:
1644 if len(new_block):
1645 self.nest_line_block_segment(new_block)
1646 new_items.append(new_block)
1647 new_block = nodes.line_block()
1648 new_items.append(item)
1649 if len(new_block):
1650 self.nest_line_block_segment(new_block)
1651 new_items.append(new_block)
1652 block[:] = new_items
1653
1654 def grid_table_top(self, match, context, next_state):
1655 """Top border of a full table."""
1656 return self.table_top(match, context, next_state,
1657 self.isolate_grid_table,
1658 tableparser.GridTableParser)
1659
1660 def simple_table_top(self, match, context, next_state):
1661 """Top border of a simple table."""
1662 return self.table_top(match, context, next_state,
1663 self.isolate_simple_table,
1664 tableparser.SimpleTableParser)
1665
1666 def table_top(self, match, context, next_state,
1667 isolate_function, parser_class):
1668 """Top border of a generic table."""
1669 nodelist, blank_finish = self.table(isolate_function, parser_class)
1670 self.parent += nodelist
1671 if not blank_finish:
1672 msg = self.reporter.warning(
1673 'Blank line required after table.',
1674 line=self.state_machine.abs_line_number()+1)
1675 self.parent += msg
1676 return [], next_state, []
1677
1678 def table(self, isolate_function, parser_class):
1679 """Parse a table."""
1680 block, messages, blank_finish = isolate_function()
1681 if block:
1682 try:
1683 parser = parser_class()
1684 tabledata = parser.parse(block)
1685 tableline = (self.state_machine.abs_line_number() - len(block)
1686 + 1)
1687 table = self.build_table(tabledata, tableline)
1688 nodelist = [table] + messages
1689 except tableparser.TableMarkupError as err:
1690 nodelist = self.malformed_table(block, ' '.join(err.args),
1691 offset=err.offset) + messages
1692 else:
1693 nodelist = messages
1694 return nodelist, blank_finish
1695
1696 def isolate_grid_table(self):
1697 messages = []
1698 blank_finish = 1
1699 try:
1700 block = self.state_machine.get_text_block(flush_left=True)
1701 except statemachine.UnexpectedIndentationError as err:
1702 block, src, srcline = err.args
1703 messages.append(self.reporter.error('Unexpected indentation.',
1704 source=src, line=srcline))
1705 blank_finish = 0
1706 block.disconnect()
1707 # for East Asian chars:
1708 block.pad_double_width(self.double_width_pad_char)
1709 width = len(block[0].strip())
1710 for i in range(len(block)):
1711 block[i] = block[i].strip()
1712 if block[i][0] not in '+|': # check left edge
1713 blank_finish = 0
1714 self.state_machine.previous_line(len(block) - i)
1715 del block[i:]
1716 break
1717 if not self.grid_table_top_pat.match(block[-1]): # find bottom
1718 blank_finish = 0
1719 # from second-last to third line of table:
1720 for i in range(len(block) - 2, 1, -1):
1721 if self.grid_table_top_pat.match(block[i]):
1722 self.state_machine.previous_line(len(block) - i + 1)
1723 del block[i+1:]
1724 break
1725 else:
1726 messages.extend(self.malformed_table(block))
1727 return [], messages, blank_finish
1728 for i in range(len(block)): # check right edge
1729 if len(block[i]) != width or block[i][-1] not in '+|':
1730 messages.extend(self.malformed_table(block))
1731 return [], messages, blank_finish
1732 return block, messages, blank_finish
1733
1734 def isolate_simple_table(self):
1735 start = self.state_machine.line_offset
1736 lines = self.state_machine.input_lines
1737 limit = len(lines) - 1
1738 toplen = len(lines[start].strip())
1739 pattern_match = self.simple_table_border_pat.match
1740 found = 0
1741 found_at = None
1742 i = start + 1
1743 while i <= limit:
1744 line = lines[i]
1745 match = pattern_match(line)
1746 if match:
1747 if len(line.strip()) != toplen:
1748 self.state_machine.next_line(i - start)
1749 messages = self.malformed_table(
1750 lines[start:i+1], 'Bottom/header table border does '
1751 'not match top border.')
1752 return [], messages, i == limit or not lines[i+1].strip()
1753 found += 1
1754 found_at = i
1755 if found == 2 or i == limit or not lines[i+1].strip():
1756 end = i
1757 break
1758 i += 1
1759 else: # reached end of input_lines
1760 if found:
1761 extra = ' or no blank line after table bottom'
1762 self.state_machine.next_line(found_at - start)
1763 block = lines[start:found_at+1]
1764 else:
1765 extra = ''
1766 self.state_machine.next_line(i - start - 1)
1767 block = lines[start:]
1768 messages = self.malformed_table(
1769 block, 'No bottom table border found%s.' % extra)
1770 return [], messages, not extra
1771 self.state_machine.next_line(end - start)
1772 block = lines[start:end+1]
1773 # for East Asian chars:
1774 block.pad_double_width(self.double_width_pad_char)
1775 return block, [], end == limit or not lines[end+1].strip()
1776
1777 def malformed_table(self, block, detail='', offset=0):
1778 block.replace(self.double_width_pad_char, '')
1779 data = '\n'.join(block)
1780 message = 'Malformed table.'
1781 startline = self.state_machine.abs_line_number() - len(block) + 1
1782 if detail:
1783 message += '\n' + detail
1784 error = self.reporter.error(message, nodes.literal_block(data, data),
1785 line=startline+offset)
1786 return [error]
1787
1788 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):
1789 colwidths, headrows, bodyrows = tabledata
1790 table = nodes.table()
1791 if widths == 'auto':
1792 table['classes'] += ['colwidths-auto']
1793 elif widths: # "grid" or list of integers
1794 table['classes'] += ['colwidths-given']
1795 tgroup = nodes.tgroup(cols=len(colwidths))
1796 table += tgroup
1797 for colwidth in colwidths:
1798 colspec = nodes.colspec(colwidth=colwidth)
1799 if stub_columns:
1800 colspec.attributes['stub'] = True
1801 stub_columns -= 1
1802 tgroup += colspec
1803 if headrows:
1804 thead = nodes.thead()
1805 tgroup += thead
1806 for row in headrows:
1807 thead += self.build_table_row(row, tableline)
1808 tbody = nodes.tbody()
1809 tgroup += tbody
1810 for row in bodyrows:
1811 tbody += self.build_table_row(row, tableline)
1812 return table
1813
1814 def build_table_row(self, rowdata, tableline):
1815 row = nodes.row()
1816 for cell in rowdata:
1817 if cell is None:
1818 continue
1819 morerows, morecols, offset, cellblock = cell
1820 attributes = {}
1821 if morerows:
1822 attributes['morerows'] = morerows
1823 if morecols:
1824 attributes['morecols'] = morecols
1825 entry = nodes.entry(**attributes)
1826 row += entry
1827 if ''.join(cellblock):
1828 self.nested_parse(cellblock, input_offset=tableline+offset,
1829 node=entry)
1830 return row
1831
1832 explicit = Struct()
1833 """Patterns and constants used for explicit markup recognition."""
1834
1835 explicit.patterns = Struct(
1836 target=re.compile(r"""
1837 (
1838 _ # anonymous target
1839 | # *OR*
1840 (?!_) # no underscore at the beginning
1841 (?P<quote>`?) # optional open quote
1842 (?![ `]) # first char. not space or
1843 # backquote
1844 (?P<name> # reference name
1845 .+?
1846 )
1847 %(non_whitespace_escape_before)s
1848 (?P=quote) # close quote if open quote used
1849 )
1850 (?<!(?<!\x00):) # no unescaped colon at end
1851 %(non_whitespace_escape_before)s
1852 [ ]? # optional space
1853 : # end of reference name
1854 ([ ]+|$) # followed by whitespace
1855 """ % vars(Inliner), re.VERBOSE),
1856 reference=re.compile(r"""
1857 (
1858 (?P<simple>%(simplename)s)_
1859 | # *OR*
1860 ` # open backquote
1861 (?![ ]) # not space
1862 (?P<phrase>.+?) # hyperlink phrase
1863 %(non_whitespace_escape_before)s
1864 `_ # close backquote,
1865 # reference mark
1866 )
1867 $ # end of string
1868 """ % vars(Inliner), re.VERBOSE),
1869 substitution=re.compile(r"""
1870 (
1871 (?![ ]) # first char. not space
1872 (?P<name>.+?) # substitution text
1873 %(non_whitespace_escape_before)s
1874 \| # close delimiter
1875 )
1876 ([ ]+|$) # followed by whitespace
1877 """ % vars(Inliner),
1878 re.VERBOSE),)
1879
1880 def footnote(self, match):
1881 src, srcline = self.state_machine.get_source_and_line()
1882 (indented, indent, offset, blank_finish
1883 ) = self.state_machine.get_first_known_indented(match.end())
1884 label = match.group(1)
1885 name = normalize_name(label)
1886 footnote = nodes.footnote('\n'.join(indented))
1887 footnote.source = src
1888 footnote.line = srcline
1889 if name[0] == '#': # auto-numbered
1890 name = name[1:] # autonumber label
1891 footnote['auto'] = 1
1892 if name:
1893 footnote['names'].append(name)
1894 self.document.note_autofootnote(footnote)
1895 elif name == '*': # auto-symbol
1896 name = ''
1897 footnote['auto'] = '*'
1898 self.document.note_symbol_footnote(footnote)
1899 else: # manually numbered
1900 footnote += nodes.label('', label)
1901 footnote['names'].append(name)
1902 self.document.note_footnote(footnote)
1903 if name:
1904 self.document.note_explicit_target(footnote, footnote)
1905 else:
1906 self.document.set_id(footnote, footnote)
1907 if indented:
1908 self.nested_parse(indented, input_offset=offset, node=footnote)
1909 else:
1910 footnote += self.reporter.warning('Footnote content expected.')
1911 return [footnote], blank_finish
1912
1913 def citation(self, match):
1914 src, srcline = self.state_machine.get_source_and_line()
1915 (indented, indent, offset, blank_finish
1916 ) = self.state_machine.get_first_known_indented(match.end())
1917 label = match.group(1)
1918 name = normalize_name(label)
1919 citation = nodes.citation('\n'.join(indented))
1920 citation.source = src
1921 citation.line = srcline
1922 citation += nodes.label('', label)
1923 citation['names'].append(name)
1924 self.document.note_citation(citation)
1925 self.document.note_explicit_target(citation, citation)
1926 if indented:
1927 self.nested_parse(indented, input_offset=offset, node=citation)
1928 else:
1929 citation += self.reporter.warning('Citation content expected.')
1930 return [citation], blank_finish
1931
1932 def hyperlink_target(self, match):
1933 pattern = self.explicit.patterns.target
1934 lineno = self.state_machine.abs_line_number()
1935 (block, indent, offset, blank_finish
1936 ) = self.state_machine.get_first_known_indented(
1937 match.end(), until_blank=True, strip_indent=False)
1938 blocktext = match.string[:match.end()] + '\n'.join(block)
1939 block = [escape2null(line) for line in block]
1940 escaped = block[0]
1941 blockindex = 0
1942 while True:
1943 targetmatch = pattern.match(escaped)
1944 if targetmatch:
1945 break
1946 blockindex += 1
1947 try:
1948 escaped += block[blockindex]
1949 except IndexError:
1950 raise MarkupError('malformed hyperlink target.')
1951 del block[:blockindex]
1952 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
1953 target = self.make_target(block, blocktext, lineno,
1954 targetmatch.group('name'))
1955 return [target], blank_finish
1956
1957 def make_target(self, block, block_text, lineno, target_name):
1958 target_type, data = self.parse_target(block, block_text, lineno)
1959 if target_type == 'refname':
1960 target = nodes.target(block_text, '', refname=normalize_name(data))
1961 target.indirect_reference_name = data
1962 self.add_target(target_name, '', target, lineno)
1963 self.document.note_indirect_target(target)
1964 return target
1965 elif target_type == 'refuri':
1966 target = nodes.target(block_text, '')
1967 self.add_target(target_name, data, target, lineno)
1968 return target
1969 else:
1970 return data
1971
1972 def parse_target(self, block, block_text, lineno):
1973 """
1974 Determine the type of reference of a target.
1975
1976 :Return: A 2-tuple, one of:
1977
1978 - 'refname' and the indirect reference name
1979 - 'refuri' and the URI
1980 - 'malformed' and a system_message node
1981 """
1982 if block and block[-1].strip()[-1:] == '_': # possible indirect target
1983 reference = ' '.join(line.strip() for line in block)
1984 refname = self.is_reference(reference)
1985 if refname:
1986 return 'refname', refname
1987 ref_parts = split_escaped_whitespace(' '.join(block))
1988 reference = ' '.join(''.join(unescape(part).split())
1989 for part in ref_parts)
1990 return 'refuri', reference
1991
1992 def is_reference(self, reference):
1993 match = self.explicit.patterns.reference.match(
1994 whitespace_normalize_name(reference))
1995 if not match:
1996 return None
1997 return unescape(match.group('simple') or match.group('phrase'))
1998
1999 def add_target(self, targetname, refuri, target, lineno):
2000 target.line = lineno
2001 if targetname:
2002 name = normalize_name(unescape(targetname))
2003 target['names'].append(name)
2004 if refuri:
2005 uri = self.inliner.adjust_uri(refuri)
2006 if uri:
2007 target['refuri'] = uri
2008 else:
2009 raise ApplicationError('problem with URI: %r' % refuri)
2010 self.document.note_explicit_target(target, self.parent)
2011 else: # anonymous target
2012 if refuri:
2013 target['refuri'] = refuri
2014 target['anonymous'] = True
2015 self.document.note_anonymous_target(target)
2016
2017 def substitution_def(self, match):
2018 pattern = self.explicit.patterns.substitution
2019 src, srcline = self.state_machine.get_source_and_line()
2020 (block, indent, offset, blank_finish
2021 ) = self.state_machine.get_first_known_indented(match.end(),
2022 strip_indent=False)
2023 blocktext = (match.string[:match.end()] + '\n'.join(block))
2024 block.disconnect()
2025 escaped = escape2null(block[0].rstrip())
2026 blockindex = 0
2027 while True:
2028 subdefmatch = pattern.match(escaped)
2029 if subdefmatch:
2030 break
2031 blockindex += 1
2032 try:
2033 escaped = escaped + ' ' + escape2null(
2034 block[blockindex].strip())
2035 except IndexError:
2036 raise MarkupError('malformed substitution definition.')
2037 del block[:blockindex] # strip out the substitution marker
2038 start = subdefmatch.end()-len(escaped)-1
2039 block[0] = (block[0].strip() + ' ')[start:-1]
2040 if not block[0]:
2041 del block[0]
2042 offset += 1
2043 while block and not block[-1].strip():
2044 block.pop()
2045 subname = subdefmatch.group('name')
2046 substitution_node = nodes.substitution_definition(blocktext)
2047 substitution_node.source = src
2048 substitution_node.line = srcline
2049 if not block:
2050 msg = self.reporter.warning(
2051 'Substitution definition "%s" missing contents.' % subname,
2052 nodes.literal_block(blocktext, blocktext),
2053 source=src, line=srcline)
2054 return [msg], blank_finish
2055 block[0] = block[0].strip()
2056 substitution_node['names'].append(
2057 nodes.whitespace_normalize_name(subname))
2058 new_abs_offset, blank_finish = self.nested_list_parse(
2059 block, input_offset=offset, node=substitution_node,
2060 initial_state='SubstitutionDef', blank_finish=blank_finish)
2061 i = 0
2062 for node in substitution_node[:]:
2063 if not (isinstance(node, nodes.Inline)
2064 or isinstance(node, nodes.Text)):
2065 self.parent += substitution_node[i]
2066 del substitution_node[i]
2067 else:
2068 i += 1
2069 for node in substitution_node.findall(nodes.Element):
2070 if self.disallowed_inside_substitution_definitions(node):
2071 pformat = nodes.literal_block('', node.pformat().rstrip())
2072 msg = self.reporter.error(
2073 'Substitution definition contains illegal element <%s>:'
2074 % node.tagname,
2075 pformat, nodes.literal_block(blocktext, blocktext),
2076 source=src, line=srcline)
2077 return [msg], blank_finish
2078 if len(substitution_node) == 0:
2079 msg = self.reporter.warning(
2080 'Substitution definition "%s" empty or invalid.' % subname,
2081 nodes.literal_block(blocktext, blocktext),
2082 source=src, line=srcline)
2083 return [msg], blank_finish
2084 self.document.note_substitution_def(
2085 substitution_node, subname, self.parent)
2086 return [substitution_node], blank_finish
2087
2088 def disallowed_inside_substitution_definitions(self, node) -> bool:
2089 if (node['ids']
2090 or isinstance(node, nodes.reference) and node.get('anonymous')
2091 or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501
2092 return True
2093 else:
2094 return False
2095
2096 def directive(self, match, **option_presets):
2097 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2098 type_name = match.group(1)
2099 directive_class, messages = directives.directive(
2100 type_name, self.memo.language, self.document)
2101 self.parent += messages
2102 if directive_class:
2103 return self.run_directive(
2104 directive_class, match, type_name, option_presets)
2105 else:
2106 return self.unknown_directive(type_name)
2107
2108 def run_directive(self, directive, match, type_name, option_presets):
2109 """
2110 Parse a directive then run its directive function.
2111
2112 Parameters:
2113
2114 - `directive`: The class implementing the directive. Must be
2115 a subclass of `rst.Directive`.
2116
2117 - `match`: A regular expression match object which matched the first
2118 line of the directive.
2119
2120 - `type_name`: The directive name, as used in the source text.
2121
2122 - `option_presets`: A dictionary of preset options, defaults for the
2123 directive options. Currently, only an "alt" option is passed by
2124 substitution definitions (value: the substitution name), which may
2125 be used by an embedded image directive.
2126
2127 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2128 """
2129 if isinstance(directive, (FunctionType, MethodType)):
2130 from docutils.parsers.rst import convert_directive_function
2131 directive = convert_directive_function(directive)
2132 lineno = self.state_machine.abs_line_number()
2133 initial_line_offset = self.state_machine.line_offset
2134 (indented, indent, line_offset, blank_finish
2135 ) = self.state_machine.get_first_known_indented(match.end(),
2136 strip_top=0)
2137 block_text = '\n'.join(self.state_machine.input_lines[
2138 initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501
2139 try:
2140 arguments, options, content, content_offset = (
2141 self.parse_directive_block(indented, line_offset,
2142 directive, option_presets))
2143 except MarkupError as detail:
2144 error = self.reporter.error(
2145 'Error in "%s" directive:\n%s.' % (type_name,
2146 ' '.join(detail.args)),
2147 nodes.literal_block(block_text, block_text), line=lineno)
2148 return [error], blank_finish
2149 directive_instance = directive(
2150 type_name, arguments, options, content, lineno,
2151 content_offset, block_text, self, self.state_machine)
2152 try:
2153 result = directive_instance.run()
2154 except docutils.parsers.rst.DirectiveError as error:
2155 msg_node = self.reporter.system_message(error.level, error.msg,
2156 line=lineno)
2157 msg_node += nodes.literal_block(block_text, block_text)
2158 result = [msg_node]
2159 assert isinstance(result, list), \
2160 'Directive "%s" must return a list of nodes.' % type_name
2161 for i in range(len(result)):
2162 assert isinstance(result[i], nodes.Node), \
2163 ('Directive "%s" returned non-Node object (index %s): %r'
2164 % (type_name, i, result[i]))
2165 return (result,
2166 blank_finish or self.state_machine.is_next_line_blank())
2167
2168 def parse_directive_block(self, indented, line_offset, directive,
2169 option_presets):
2170 option_spec = directive.option_spec
2171 has_content = directive.has_content
2172 if indented and not indented[0].strip():
2173 indented.trim_start()
2174 line_offset += 1
2175 while indented and not indented[-1].strip():
2176 indented.trim_end()
2177 if indented and (directive.required_arguments
2178 or directive.optional_arguments
2179 or option_spec):
2180 for i, line in enumerate(indented):
2181 if not line.strip():
2182 break
2183 else:
2184 i += 1
2185 arg_block = indented[:i]
2186 content = indented[i+1:]
2187 content_offset = line_offset + i + 1
2188 else:
2189 content = indented
2190 content_offset = line_offset
2191 arg_block = []
2192 if option_spec:
2193 options, arg_block = self.parse_directive_options(
2194 option_presets, option_spec, arg_block)
2195 else:
2196 options = {}
2197 if arg_block and not (directive.required_arguments
2198 or directive.optional_arguments):
2199 content = arg_block + indented[i:]
2200 content_offset = line_offset
2201 arg_block = []
2202 while content and not content[0].strip():
2203 content.trim_start()
2204 content_offset += 1
2205 if directive.required_arguments or directive.optional_arguments:
2206 arguments = self.parse_directive_arguments(
2207 directive, arg_block)
2208 else:
2209 arguments = []
2210 if content and not has_content:
2211 raise MarkupError('no content permitted')
2212 return arguments, options, content, content_offset
2213
2214 def parse_directive_options(self, option_presets, option_spec, arg_block):
2215 options = option_presets.copy()
2216 for i, line in enumerate(arg_block):
2217 if re.match(Body.patterns['field_marker'], line):
2218 opt_block = arg_block[i:]
2219 arg_block = arg_block[:i]
2220 break
2221 else:
2222 opt_block = []
2223 if opt_block:
2224 success, data = self.parse_extension_options(option_spec,
2225 opt_block)
2226 if success: # data is a dict of options
2227 options.update(data)
2228 else: # data is an error string
2229 raise MarkupError(data)
2230 return options, arg_block
2231
2232 def parse_directive_arguments(self, directive, arg_block):
2233 required = directive.required_arguments
2234 optional = directive.optional_arguments
2235 arg_text = '\n'.join(arg_block)
2236 arguments = arg_text.split()
2237 if len(arguments) < required:
2238 raise MarkupError('%s argument(s) required, %s supplied'
2239 % (required, len(arguments)))
2240 elif len(arguments) > required + optional:
2241 if directive.final_argument_whitespace:
2242 arguments = arg_text.split(None, required + optional - 1)
2243 else:
2244 raise MarkupError(
2245 'maximum %s argument(s) allowed, %s supplied'
2246 % (required + optional, len(arguments)))
2247 return arguments
2248
2249 def parse_extension_options(self, option_spec, datalines):
2250 """
2251 Parse `datalines` for a field list containing extension options
2252 matching `option_spec`.
2253
2254 :Parameters:
2255 - `option_spec`: a mapping of option name to conversion
2256 function, which should raise an exception on bad input.
2257 - `datalines`: a list of input strings.
2258
2259 :Return:
2260 - Success value, 1 or 0.
2261 - An option dictionary on success, an error string on failure.
2262 """
2263 node = nodes.field_list()
2264 newline_offset, blank_finish = self.nested_list_parse(
2265 datalines, 0, node, initial_state='ExtensionOptions',
2266 blank_finish=True)
2267 if newline_offset != len(datalines): # incomplete parse of block
2268 return 0, 'invalid option block'
2269 try:
2270 options = utils.extract_extension_options(node, option_spec)
2271 except KeyError as detail:
2272 return 0, 'unknown option: "%s"' % detail.args[0]
2273 except (ValueError, TypeError) as detail:
2274 return 0, 'invalid option value: %s' % ' '.join(detail.args)
2275 except utils.ExtensionOptionError as detail:
2276 return 0, 'invalid option data: %s' % ' '.join(detail.args)
2277 if blank_finish:
2278 return 1, options
2279 else:
2280 return 0, 'option data incompletely parsed'
2281
2282 def unknown_directive(self, type_name):
2283 lineno = self.state_machine.abs_line_number()
2284 (indented, indent, offset, blank_finish
2285 ) = self.state_machine.get_first_known_indented(0, strip_indent=False)
2286 text = '\n'.join(indented)
2287 error = self.reporter.error('Unknown directive type "%s".' % type_name,
2288 nodes.literal_block(text, text),
2289 line=lineno)
2290 return [error], blank_finish
2291
2292 def comment(self, match):
2293 if self.state_machine.is_next_line_blank():
2294 first_comment_line = match.string[match.end():]
2295 if not first_comment_line.strip(): # empty comment
2296 return [nodes.comment()], True # "A tiny but practical wart."
2297 if first_comment_line.startswith('end of inclusion from "'):
2298 # cf. parsers.rst.directives.misc.Include
2299 self.document.include_log.pop()
2300 return [], True
2301 (indented, indent, offset, blank_finish
2302 ) = self.state_machine.get_first_known_indented(match.end())
2303 while indented and not indented[-1].strip():
2304 indented.trim_end()
2305 text = '\n'.join(indented)
2306 return [nodes.comment(text, text)], blank_finish
2307
2308 explicit.constructs = [
2309 (footnote,
2310 re.compile(r"""
2311 \.\.[ ]+ # explicit markup start
2312 \[
2313 ( # footnote label:
2314 [0-9]+ # manually numbered footnote
2315 | # *OR*
2316 \# # anonymous auto-numbered footnote
2317 | # *OR*
2318 \#%s # auto-number ed?) footnote label
2319 | # *OR*
2320 \* # auto-symbol footnote
2321 )
2322 \]
2323 ([ ]+|$) # whitespace or end of line
2324 """ % Inliner.simplename, re.VERBOSE)),
2325 (citation,
2326 re.compile(r"""
2327 \.\.[ ]+ # explicit markup start
2328 \[(%s)\] # citation label
2329 ([ ]+|$) # whitespace or end of line
2330 """ % Inliner.simplename, re.VERBOSE)),
2331 (hyperlink_target,
2332 re.compile(r"""
2333 \.\.[ ]+ # explicit markup start
2334 _ # target indicator
2335 (?![ ]|$) # first char. not space or EOL
2336 """, re.VERBOSE)),
2337 (substitution_def,
2338 re.compile(r"""
2339 \.\.[ ]+ # explicit markup start
2340 \| # substitution indicator
2341 (?![ ]|$) # first char. not space or EOL
2342 """, re.VERBOSE)),
2343 (directive,
2344 re.compile(r"""
2345 \.\.[ ]+ # explicit markup start
2346 (%s) # directive name
2347 [ ]? # optional space
2348 :: # directive delimiter
2349 ([ ]+|$) # whitespace or end of line
2350 """ % Inliner.simplename, re.VERBOSE))]
2351
2352 def explicit_markup(self, match, context, next_state):
2353 """Footnotes, hyperlink targets, directives, comments."""
2354 nodelist, blank_finish = self.explicit_construct(match)
2355 self.parent += nodelist
2356 self.explicit_list(blank_finish)
2357 return [], next_state, []
2358
2359 def explicit_construct(self, match):
2360 """Determine which explicit construct this is, parse & return it."""
2361 errors = []
2362 for method, pattern in self.explicit.constructs:
2363 expmatch = pattern.match(match.string)
2364 if expmatch:
2365 try:
2366 return method(self, expmatch)
2367 except MarkupError as error:
2368 lineno = self.state_machine.abs_line_number()
2369 message = ' '.join(error.args)
2370 errors.append(self.reporter.warning(message, line=lineno))
2371 break
2372 nodelist, blank_finish = self.comment(match)
2373 return nodelist + errors, blank_finish
2374
2375 def explicit_list(self, blank_finish) -> None:
2376 """
2377 Create a nested state machine for a series of explicit markup
2378 constructs (including anonymous hyperlink targets).
2379 """
2380 offset = self.state_machine.line_offset + 1 # next line
2381 newline_offset, blank_finish = self.nested_list_parse(
2382 self.state_machine.input_lines[offset:],
2383 input_offset=self.state_machine.abs_line_offset() + 1,
2384 node=self.parent, initial_state='Explicit',
2385 blank_finish=blank_finish,
2386 match_titles=self.state_machine.match_titles)
2387 self.goto_line(newline_offset)
2388 if not blank_finish:
2389 self.parent += self.unindent_warning('Explicit markup')
2390
2391 def anonymous(self, match, context, next_state):
2392 """Anonymous hyperlink targets."""
2393 nodelist, blank_finish = self.anonymous_target(match)
2394 self.parent += nodelist
2395 self.explicit_list(blank_finish)
2396 return [], next_state, []
2397
2398 def anonymous_target(self, match):
2399 lineno = self.state_machine.abs_line_number()
2400 (block, indent, offset, blank_finish
2401 ) = self.state_machine.get_first_known_indented(match.end(),
2402 until_blank=True)
2403 blocktext = match.string[:match.end()] + '\n'.join(block)
2404 block = [escape2null(line) for line in block]
2405 target = self.make_target(block, blocktext, lineno, '')
2406 return [target], blank_finish
2407
2408 def line(self, match, context, next_state):
2409 """Section title overline or transition marker."""
2410 if self.state_machine.match_titles:
2411 return [match.string], 'Line', []
2412 elif match.string.strip() == '::':
2413 raise statemachine.TransitionCorrection('text')
2414 elif len(match.string.strip()) < 4:
2415 msg = self.reporter.info(
2416 'Unexpected possible title overline or transition.\n'
2417 "Treating it as ordinary text because it's so short.",
2418 line=self.state_machine.abs_line_number())
2419 self.parent += msg
2420 raise statemachine.TransitionCorrection('text')
2421 else:
2422 blocktext = self.state_machine.line
2423 msg = self.reporter.severe(
2424 'Unexpected section title or transition.',
2425 nodes.literal_block(blocktext, blocktext),
2426 line=self.state_machine.abs_line_number())
2427 self.parent += msg
2428 return [], next_state, []
2429
2430 def text(self, match, context, next_state):
2431 """Titles, definition lists, paragraphs."""
2432 return [match.string], 'Text', []
2433
2434
2435class RFC2822Body(Body):
2436
2437 """
2438 RFC2822 headers are only valid as the first constructs in documents. As
2439 soon as anything else appears, the `Body` state should take over.
2440 """
2441
2442 patterns = Body.patterns.copy() # can't modify the original
2443 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
2444 initial_transitions = [(name, 'Body')
2445 for name in Body.initial_transitions]
2446 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2447
2448 def rfc2822(self, match, context, next_state):
2449 """RFC2822-style field list item."""
2450 fieldlist = nodes.field_list(classes=['rfc2822'])
2451 self.parent += fieldlist
2452 field, blank_finish = self.rfc2822_field(match)
2453 fieldlist += field
2454 offset = self.state_machine.line_offset + 1 # next line
2455 newline_offset, blank_finish = self.nested_list_parse(
2456 self.state_machine.input_lines[offset:],
2457 input_offset=self.state_machine.abs_line_offset() + 1,
2458 node=fieldlist, initial_state='RFC2822List',
2459 blank_finish=blank_finish)
2460 self.goto_line(newline_offset)
2461 if not blank_finish:
2462 self.parent += self.unindent_warning(
2463 'RFC2822-style field list')
2464 return [], next_state, []
2465
2466 def rfc2822_field(self, match):
2467 name = match.string[:match.string.find(':')]
2468 (indented, indent, line_offset, blank_finish
2469 ) = self.state_machine.get_first_known_indented(match.end(),
2470 until_blank=True)
2471 fieldnode = nodes.field()
2472 fieldnode += nodes.field_name(name, name)
2473 fieldbody = nodes.field_body('\n'.join(indented))
2474 fieldnode += fieldbody
2475 if indented:
2476 self.nested_parse(indented, input_offset=line_offset,
2477 node=fieldbody)
2478 return fieldnode, blank_finish
2479
2480
2481class SpecializedBody(Body):
2482
2483 """
2484 Superclass for second and subsequent compound element members. Compound
2485 elements are lists and list-like constructs.
2486
2487 All transition methods are disabled (redefined as `invalid_input`).
2488 Override individual methods in subclasses to re-enable.
2489
2490 For example, once an initial bullet list item, say, is recognized, the
2491 `BulletList` subclass takes over, with a "bullet_list" node as its
2492 container. Upon encountering the initial bullet list item, `Body.bullet`
2493 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2494 starts up a nested parsing session with `BulletList` as the initial state.
2495 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2496 as only bullet list items are encountered, they are parsed and inserted
2497 into the container. The first construct which is *not* a bullet list item
2498 triggers the `invalid_input` method, which ends the nested parse and
2499 closes the container. `BulletList` needs to recognize input that is
2500 invalid in the context of a bullet list, which means everything *other
2501 than* bullet list items, so it inherits the transition list created in
2502 `Body`.
2503 """
2504
2505 def invalid_input(self, match=None, context=None, next_state=None):
2506 """Not a compound element member. Abort this state machine."""
2507 self.state_machine.previous_line() # back up so parent SM can reassess
2508 raise EOFError
2509
2510 indent = invalid_input
2511 bullet = invalid_input
2512 enumerator = invalid_input
2513 field_marker = invalid_input
2514 option_marker = invalid_input
2515 doctest = invalid_input
2516 line_block = invalid_input
2517 grid_table_top = invalid_input
2518 simple_table_top = invalid_input
2519 explicit_markup = invalid_input
2520 anonymous = invalid_input
2521 line = invalid_input
2522 text = invalid_input
2523
2524
2525class BulletList(SpecializedBody):
2526
2527 """Second and subsequent bullet_list list_items."""
2528
2529 def bullet(self, match, context, next_state):
2530 """Bullet list item."""
2531 if match.string[0] != self.parent['bullet']:
2532 # different bullet: new list
2533 self.invalid_input()
2534 listitem, blank_finish = self.list_item(match.end())
2535 self.parent += listitem
2536 self.blank_finish = blank_finish
2537 return [], next_state, []
2538
2539
2540class DefinitionList(SpecializedBody):
2541
2542 """Second and subsequent definition_list_items."""
2543
2544 def text(self, match, context, next_state):
2545 """Definition lists."""
2546 return [match.string], 'Definition', []
2547
2548
2549class EnumeratedList(SpecializedBody):
2550
2551 """Second and subsequent enumerated_list list_items."""
2552
2553 def enumerator(self, match, context, next_state):
2554 """Enumerated list item."""
2555 format, sequence, text, ordinal = self.parse_enumerator(
2556 match, self.parent['enumtype'])
2557 if (format != self.format
2558 or (sequence != '#' and (sequence != self.parent['enumtype']
2559 or self.auto
2560 or ordinal != (self.lastordinal + 1)))
2561 or not self.is_enumerated_list_item(ordinal, sequence, format)):
2562 # different enumeration: new list
2563 self.invalid_input()
2564 if sequence == '#':
2565 self.auto = 1
2566 listitem, blank_finish = self.list_item(match.end())
2567 self.parent += listitem
2568 self.blank_finish = blank_finish
2569 self.lastordinal = ordinal
2570 return [], next_state, []
2571
2572
2573class FieldList(SpecializedBody):
2574
2575 """Second and subsequent field_list fields."""
2576
2577 def field_marker(self, match, context, next_state):
2578 """Field list field."""
2579 field, blank_finish = self.field(match)
2580 self.parent += field
2581 self.blank_finish = blank_finish
2582 return [], next_state, []
2583
2584
2585class OptionList(SpecializedBody):
2586
2587 """Second and subsequent option_list option_list_items."""
2588
2589 def option_marker(self, match, context, next_state):
2590 """Option list item."""
2591 try:
2592 option_list_item, blank_finish = self.option_list_item(match)
2593 except MarkupError:
2594 self.invalid_input()
2595 self.parent += option_list_item
2596 self.blank_finish = blank_finish
2597 return [], next_state, []
2598
2599
2600class RFC2822List(SpecializedBody, RFC2822Body):
2601
2602 """Second and subsequent RFC2822-style field_list fields."""
2603
2604 patterns = RFC2822Body.patterns
2605 initial_transitions = RFC2822Body.initial_transitions
2606
2607 def rfc2822(self, match, context, next_state):
2608 """RFC2822-style field list item."""
2609 field, blank_finish = self.rfc2822_field(match)
2610 self.parent += field
2611 self.blank_finish = blank_finish
2612 return [], 'RFC2822List', []
2613
2614 blank = SpecializedBody.invalid_input
2615
2616
2617class ExtensionOptions(FieldList):
2618
2619 """
2620 Parse field_list fields for extension options.
2621
2622 No nested parsing is done (including inline markup parsing).
2623 """
2624
2625 def parse_field_body(self, indented, offset, node) -> None:
2626 """Override `Body.parse_field_body` for simpler parsing."""
2627 lines = []
2628 for line in list(indented) + ['']:
2629 if line.strip():
2630 lines.append(line)
2631 elif lines:
2632 text = '\n'.join(lines)
2633 node += nodes.paragraph(text, text)
2634 lines = []
2635
2636
2637class LineBlock(SpecializedBody):
2638
2639 """Second and subsequent lines of a line_block."""
2640
2641 blank = SpecializedBody.invalid_input
2642
2643 def line_block(self, match, context, next_state):
2644 """New line of line block."""
2645 lineno = self.state_machine.abs_line_number()
2646 line, messages, blank_finish = self.line_block_line(match, lineno)
2647 self.parent += line
2648 self.parent.parent += messages
2649 self.blank_finish = blank_finish
2650 return [], next_state, []
2651
2652
2653class Explicit(SpecializedBody):
2654
2655 """Second and subsequent explicit markup construct."""
2656
2657 def explicit_markup(self, match, context, next_state):
2658 """Footnotes, hyperlink targets, directives, comments."""
2659 nodelist, blank_finish = self.explicit_construct(match)
2660 self.parent += nodelist
2661 self.blank_finish = blank_finish
2662 return [], next_state, []
2663
2664 def anonymous(self, match, context, next_state):
2665 """Anonymous hyperlink targets."""
2666 nodelist, blank_finish = self.anonymous_target(match)
2667 self.parent += nodelist
2668 self.blank_finish = blank_finish
2669 return [], next_state, []
2670
2671 blank = SpecializedBody.invalid_input
2672
2673
2674class SubstitutionDef(Body):
2675
2676 """
2677 Parser for the contents of a substitution_definition element.
2678 """
2679
2680 patterns = {
2681 'embedded_directive': re.compile(r'(%s)::( +|$)'
2682 % Inliner.simplename),
2683 'text': r''}
2684 initial_transitions = ['embedded_directive', 'text']
2685
2686 def embedded_directive(self, match, context, next_state):
2687 nodelist, blank_finish = self.directive(match,
2688 alt=self.parent['names'][0])
2689 self.parent += nodelist
2690 if not self.state_machine.at_eof():
2691 self.blank_finish = blank_finish
2692 raise EOFError
2693
2694 def text(self, match, context, next_state):
2695 if not self.state_machine.at_eof():
2696 self.blank_finish = self.state_machine.is_next_line_blank()
2697 raise EOFError
2698
2699
2700class Text(RSTState):
2701
2702 """
2703 Classifier of second line of a text block.
2704
2705 Could be a paragraph, a definition list item, or a title.
2706 """
2707
2708 patterns = {'underline': Body.patterns['line'],
2709 'text': r''}
2710 initial_transitions = [('underline', 'Body'), ('text', 'Body')]
2711
2712 def blank(self, match, context, next_state):
2713 """End of paragraph."""
2714 # NOTE: self.paragraph returns [node, system_message(s)], literalnext
2715 paragraph, literalnext = self.paragraph(
2716 context, self.state_machine.abs_line_number() - 1)
2717 self.parent += paragraph
2718 if literalnext:
2719 self.parent += self.literal_block()
2720 return [], 'Body', []
2721
2722 def eof(self, context):
2723 if context:
2724 self.blank(None, context, None)
2725 return []
2726
2727 def indent(self, match, context, next_state):
2728 """Definition list item."""
2729 dl = nodes.definition_list()
2730 # the definition list starts on the line before the indent:
2731 lineno = self.state_machine.abs_line_number() - 1
2732 dl.source, dl.line = self.state_machine.get_source_and_line(lineno)
2733 dl_item, blank_finish = self.definition_list_item(context)
2734 dl += dl_item
2735 self.parent += dl
2736 offset = self.state_machine.line_offset + 1 # next line
2737 newline_offset, blank_finish = self.nested_list_parse(
2738 self.state_machine.input_lines[offset:],
2739 input_offset=self.state_machine.abs_line_offset() + 1,
2740 node=dl, initial_state='DefinitionList',
2741 blank_finish=blank_finish, blank_finish_state='Definition')
2742 self.goto_line(newline_offset)
2743 if not blank_finish:
2744 self.parent += self.unindent_warning('Definition list')
2745 return [], 'Body', []
2746
2747 def underline(self, match, context, next_state):
2748 """Section title."""
2749 lineno = self.state_machine.abs_line_number()
2750 title = context[0].rstrip()
2751 underline = match.string.rstrip()
2752 source = title + '\n' + underline
2753 messages = []
2754 if column_width(title) > len(underline):
2755 if len(underline) < 4:
2756 if self.state_machine.match_titles:
2757 msg = self.reporter.info(
2758 'Possible title underline, too short for the title.\n'
2759 "Treating it as ordinary text because it's so short.",
2760 line=lineno)
2761 self.parent += msg
2762 raise statemachine.TransitionCorrection('text')
2763 else:
2764 blocktext = context[0] + '\n' + self.state_machine.line
2765 msg = self.reporter.warning(
2766 'Title underline too short.',
2767 nodes.literal_block(blocktext, blocktext),
2768 line=lineno)
2769 messages.append(msg)
2770 if not self.state_machine.match_titles:
2771 blocktext = context[0] + '\n' + self.state_machine.line
2772 # We need get_source_and_line() here to report correctly
2773 src, srcline = self.state_machine.get_source_and_line()
2774 # TODO: why is abs_line_number() == srcline+1
2775 # if the error is in a table (try with test_tables.py)?
2776 # print("get_source_and_line", srcline)
2777 # print("abs_line_number", self.state_machine.abs_line_number())
2778 msg = self.reporter.severe(
2779 'Unexpected section title.',
2780 nodes.literal_block(blocktext, blocktext),
2781 source=src, line=srcline)
2782 self.parent += messages
2783 self.parent += msg
2784 return [], next_state, []
2785 style = underline[0]
2786 context[:] = []
2787 self.section(title, source, style, lineno - 1, messages)
2788 return [], next_state, []
2789
2790 def text(self, match, context, next_state):
2791 """Paragraph."""
2792 startline = self.state_machine.abs_line_number() - 1
2793 msg = None
2794 try:
2795 block = self.state_machine.get_text_block(flush_left=True)
2796 except statemachine.UnexpectedIndentationError as err:
2797 block, src, srcline = err.args
2798 msg = self.reporter.error('Unexpected indentation.',
2799 source=src, line=srcline)
2800 lines = context + list(block)
2801 paragraph, literalnext = self.paragraph(lines, startline)
2802 self.parent += paragraph
2803 self.parent += msg
2804 if literalnext:
2805 try:
2806 self.state_machine.next_line()
2807 except EOFError:
2808 pass
2809 self.parent += self.literal_block()
2810 return [], next_state, []
2811
2812 def literal_block(self):
2813 """Return a list of nodes."""
2814 (indented, indent, offset, blank_finish
2815 ) = self.state_machine.get_indented()
2816 while indented and not indented[-1].strip():
2817 indented.trim_end()
2818 if not indented:
2819 return self.quoted_literal_block()
2820 data = '\n'.join(indented)
2821 literal_block = nodes.literal_block(data, data)
2822 (literal_block.source,
2823 literal_block.line) = self.state_machine.get_source_and_line(offset+1)
2824 nodelist = [literal_block]
2825 if not blank_finish:
2826 nodelist.append(self.unindent_warning('Literal block'))
2827 return nodelist
2828
2829 def quoted_literal_block(self):
2830 abs_line_offset = self.state_machine.abs_line_offset()
2831 offset = self.state_machine.line_offset
2832 parent_node = nodes.Element()
2833 new_abs_offset = self.nested_parse(
2834 self.state_machine.input_lines[offset:],
2835 input_offset=abs_line_offset, node=parent_node, match_titles=False,
2836 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
2837 'initial_state': 'QuotedLiteralBlock'})
2838 self.goto_line(new_abs_offset)
2839 return parent_node.children
2840
2841 def definition_list_item(self, termline):
2842 # the parser is already on the second (indented) line:
2843 dd_lineno = self.state_machine.abs_line_number()
2844 dt_lineno = dd_lineno - 1
2845 (indented, indent, line_offset, blank_finish
2846 ) = self.state_machine.get_indented()
2847 dl_item = nodes.definition_list_item(
2848 '\n'.join(termline + list(indented)))
2849 (dl_item.source,
2850 dl_item.line) = self.state_machine.get_source_and_line(dt_lineno)
2851 dt_nodes, messages = self.term(termline, dt_lineno)
2852 dl_item += dt_nodes
2853 dd = nodes.definition('', *messages)
2854 dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno)
2855 dl_item += dd
2856 if termline[0][-2:] == '::':
2857 dd += self.reporter.info(
2858 'Blank line missing before literal block (after the "::")? '
2859 'Interpreted as a definition list item.',
2860 line=dd_lineno)
2861 # TODO: drop a definition if it is an empty comment to allow
2862 # definition list items with several terms?
2863 # https://sourceforge.net/p/docutils/feature-requests/60/
2864 self.nested_parse(indented, input_offset=line_offset, node=dd)
2865 return dl_item, blank_finish
2866
2867 classifier_delimiter = re.compile(' +: +')
2868
2869 def term(self, lines, lineno):
2870 """Return a definition_list's term and optional classifiers."""
2871 assert len(lines) == 1
2872 text_nodes, messages = self.inline_text(lines[0], lineno)
2873 dt = nodes.term(lines[0])
2874 dt.source, dt.line = self.state_machine.get_source_and_line(lineno)
2875 node_list = [dt]
2876 for i in range(len(text_nodes)):
2877 node = text_nodes[i]
2878 if isinstance(node, nodes.Text):
2879 parts = self.classifier_delimiter.split(node)
2880 if len(parts) == 1:
2881 node_list[-1] += node
2882 else:
2883 text = parts[0].rstrip()
2884 textnode = nodes.Text(text)
2885 node_list[-1] += textnode
2886 node_list += [nodes.classifier(unescape(part, True), part)
2887 for part in parts[1:]]
2888 else:
2889 node_list[-1] += node
2890 return node_list, messages
2891
2892
2893class SpecializedText(Text):
2894
2895 """
2896 Superclass for second and subsequent lines of Text-variants.
2897
2898 All transition methods are disabled. Override individual methods in
2899 subclasses to re-enable.
2900 """
2901
2902 def eof(self, context):
2903 """Incomplete construct."""
2904 return []
2905
2906 def invalid_input(self, match=None, context=None, next_state=None):
2907 """Not a compound element member. Abort this state machine."""
2908 raise EOFError
2909
2910 blank = invalid_input
2911 indent = invalid_input
2912 underline = invalid_input
2913 text = invalid_input
2914
2915
2916class Definition(SpecializedText):
2917
2918 """Second line of potential definition_list_item."""
2919
2920 def eof(self, context):
2921 """Not a definition."""
2922 self.state_machine.previous_line(2) # so parent SM can reassess
2923 return []
2924
2925 def indent(self, match, context, next_state):
2926 """Definition list item."""
2927 dl_item, blank_finish = self.definition_list_item(context)
2928 self.parent += dl_item
2929 self.blank_finish = blank_finish
2930 return [], 'DefinitionList', []
2931
2932
2933class Line(SpecializedText):
2934
2935 """
2936 Second line of over- & underlined section title or transition marker.
2937 """
2938
2939 eofcheck = 1 # ignored, will be removed in Docutils 2.0.
2940
2941 def eof(self, context):
2942 """Transition marker at end of section or document."""
2943 marker = context[0].strip()
2944 if len(marker) < 4:
2945 self.state_correction(context)
2946 src, srcline = self.state_machine.get_source_and_line()
2947 # lineno = self.state_machine.abs_line_number() - 1
2948 transition = nodes.transition(rawsource=context[0])
2949 transition.source = src
2950 transition.line = srcline - 1
2951 # transition.line = lineno
2952 self.parent += transition
2953 return []
2954
2955 def blank(self, match, context, next_state):
2956 """Transition marker."""
2957 src, srcline = self.state_machine.get_source_and_line()
2958 marker = context[0].strip()
2959 if len(marker) < 4:
2960 self.state_correction(context)
2961 transition = nodes.transition(rawsource=marker)
2962 transition.source = src
2963 transition.line = srcline - 1
2964 self.parent += transition
2965 return [], 'Body', []
2966
2967 def text(self, match, context, next_state):
2968 """Potential over- & underlined title."""
2969 lineno = self.state_machine.abs_line_number() - 1
2970 overline = context[0]
2971 title = match.string
2972 underline = ''
2973 try:
2974 underline = self.state_machine.next_line()
2975 except EOFError:
2976 blocktext = overline + '\n' + title
2977 if len(overline.rstrip()) < 4:
2978 self.short_overline(context, blocktext, lineno, 2)
2979 else:
2980 msg = self.reporter.severe(
2981 'Incomplete section title.',
2982 nodes.literal_block(blocktext, blocktext),
2983 line=lineno)
2984 self.parent += msg
2985 return [], 'Body', []
2986 source = '%s\n%s\n%s' % (overline, title, underline)
2987 overline = overline.rstrip()
2988 underline = underline.rstrip()
2989 if not self.transitions['underline'][0].match(underline):
2990 blocktext = overline + '\n' + title + '\n' + underline
2991 if len(overline.rstrip()) < 4:
2992 self.short_overline(context, blocktext, lineno, 2)
2993 else:
2994 msg = self.reporter.severe(
2995 'Missing matching underline for section title overline.',
2996 nodes.literal_block(source, source),
2997 line=lineno)
2998 self.parent += msg
2999 return [], 'Body', []
3000 elif overline != underline:
3001 blocktext = overline + '\n' + title + '\n' + underline
3002 if len(overline.rstrip()) < 4:
3003 self.short_overline(context, blocktext, lineno, 2)
3004 else:
3005 msg = self.reporter.severe(
3006 'Title overline & underline mismatch.',
3007 nodes.literal_block(source, source),
3008 line=lineno)
3009 self.parent += msg
3010 return [], 'Body', []
3011 title = title.rstrip()
3012 messages = []
3013 if column_width(title) > len(overline):
3014 blocktext = overline + '\n' + title + '\n' + underline
3015 if len(overline.rstrip()) < 4:
3016 self.short_overline(context, blocktext, lineno, 2)
3017 else:
3018 msg = self.reporter.warning(
3019 'Title overline too short.',
3020 nodes.literal_block(source, source),
3021 line=lineno)
3022 messages.append(msg)
3023 style = (overline[0], underline[0])
3024 self.section(title.lstrip(), source, style, lineno + 1, messages)
3025 return [], 'Body', []
3026
3027 indent = text # indented title
3028
3029 def underline(self, match, context, next_state):
3030 overline = context[0]
3031 blocktext = overline + '\n' + self.state_machine.line
3032 lineno = self.state_machine.abs_line_number() - 1
3033 if len(overline.rstrip()) < 4:
3034 self.short_overline(context, blocktext, lineno, 1)
3035 msg = self.reporter.error(
3036 'Invalid section title or transition marker.',
3037 nodes.literal_block(blocktext, blocktext),
3038 line=lineno)
3039 self.parent += msg
3040 return [], 'Body', []
3041
3042 def short_overline(self, context, blocktext, lineno, lines=1) -> None:
3043 msg = self.reporter.info(
3044 'Possible incomplete section title.\nTreating the overline as '
3045 "ordinary text because it's so short.",
3046 line=lineno)
3047 self.parent += msg
3048 self.state_correction(context, lines)
3049
3050 def state_correction(self, context, lines=1):
3051 self.state_machine.previous_line(lines)
3052 context[:] = []
3053 raise statemachine.StateCorrection('Body', 'text')
3054
3055
3056class QuotedLiteralBlock(RSTState):
3057
3058 """
3059 Nested parse handler for quoted (unindented) literal blocks.
3060
3061 Special-purpose. Not for inclusion in `state_classes`.
3062 """
3063
3064 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
3065 'text': r''}
3066 initial_transitions = ('initial_quoted', 'text')
3067
3068 def __init__(self, state_machine, debug=False) -> None:
3069 RSTState.__init__(self, state_machine, debug)
3070 self.messages = []
3071 self.initial_lineno = None
3072
3073 def blank(self, match, context, next_state):
3074 if context:
3075 raise EOFError
3076 else:
3077 return context, next_state, []
3078
3079 def eof(self, context):
3080 if context:
3081 src, srcline = self.state_machine.get_source_and_line(
3082 self.initial_lineno)
3083 text = '\n'.join(context)
3084 literal_block = nodes.literal_block(text, text)
3085 literal_block.source = src
3086 literal_block.line = srcline
3087 self.parent += literal_block
3088 else:
3089 self.parent += self.reporter.warning(
3090 'Literal block expected; none found.',
3091 line=self.state_machine.abs_line_number()
3092 ) # src not available, statemachine.input_lines is empty
3093 self.state_machine.previous_line()
3094 self.parent += self.messages
3095 return []
3096
3097 def indent(self, match, context, next_state):
3098 assert context, ('QuotedLiteralBlock.indent: context should not '
3099 'be empty!')
3100 self.messages.append(
3101 self.reporter.error('Unexpected indentation.',
3102 line=self.state_machine.abs_line_number()))
3103 self.state_machine.previous_line()
3104 raise EOFError
3105
3106 def initial_quoted(self, match, context, next_state):
3107 """Match arbitrary quote character on the first line only."""
3108 self.remove_transition('initial_quoted')
3109 quote = match.string[0]
3110 pattern = re.compile(re.escape(quote))
3111 # New transition matches consistent quotes only:
3112 self.add_transition('quoted',
3113 (pattern, self.quoted, self.__class__.__name__))
3114 self.initial_lineno = self.state_machine.abs_line_number()
3115 return [match.string], next_state, []
3116
3117 def quoted(self, match, context, next_state):
3118 """Match consistent quotes on subsequent lines."""
3119 context.append(match.string)
3120 return context, next_state, []
3121
3122 def text(self, match, context, next_state):
3123 if context:
3124 self.messages.append(
3125 self.reporter.error('Inconsistent literal block quoting.',
3126 line=self.state_machine.abs_line_number()))
3127 self.state_machine.previous_line()
3128 raise EOFError
3129
3130
3131state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
3132 OptionList, LineBlock, ExtensionOptions, Explicit, Text,
3133 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
3134"""Standard set of State classes used to start `RSTStateMachine`."""