1# $Id$ 
    2# Author: David Goodger <goodger@python.org> 
    3# Copyright: This module has been placed in the public domain. 
    4 
    5""" 
    6This is the ``docutils.parsers.rst.states`` module, the core of 
    7the reStructuredText parser.  It defines the following: 
    8 
    9:Classes: 
    10    - `RSTStateMachine`: reStructuredText parser's entry point. 
    11    - `NestedStateMachine`: recursive StateMachine. 
    12    - `RSTState`: reStructuredText State superclass. 
    13    - `Inliner`: For parsing inline markup. 
    14    - `Body`: Generic classifier of the first line of a block. 
    15    - `SpecializedBody`: Superclass for compound element members. 
    16    - `BulletList`: Second and subsequent bullet_list list_items 
    17    - `DefinitionList`: Second+ definition_list_items. 
    18    - `EnumeratedList`: Second+ enumerated_list list_items. 
    19    - `FieldList`: Second+ fields. 
    20    - `OptionList`: Second+ option_list_items. 
    21    - `RFC2822List`: Second+ RFC2822-style fields. 
    22    - `ExtensionOptions`: Parses directive option fields. 
    23    - `Explicit`: Second+ explicit markup constructs. 
    24    - `SubstitutionDef`: For embedded directives in substitution definitions. 
    25    - `Text`: Classifier of second line of a text block. 
    26    - `SpecializedText`: Superclass for continuation lines of Text-variants. 
    27    - `Definition`: Second line of potential definition_list_item. 
    28    - `Line`: Second line of overlined section title or transition marker. 
    29    - `Struct`: An auxiliary collection class. 
    30 
    31:Exception classes: 
    32    - `MarkupError` 
    33    - `ParserError` 
    34    - `MarkupMismatch` 
    35 
    36:Functions: 
    37    - `escape2null()`: Return a string, escape-backslashes converted to nulls. 
    38    - `unescape()`: Return a string, nulls removed or restored to backslashes. 
    39 
    40:Attributes: 
    41    - `state_classes`: set of State classes used with `RSTStateMachine`. 
    42 
    43Parser Overview 
    44=============== 
    45 
    46The reStructuredText parser is implemented as a recursive state machine, 
    47examining its input one line at a time.  To understand how the parser works, 
    48please first become familiar with the `docutils.statemachine` module.  In the 
    49description below, references are made to classes defined in this module; 
    50please see the individual classes for details. 
    51 
    52Parsing proceeds as follows: 
    53 
    541. The state machine examines each line of input, checking each of the 
    55   transition patterns of the state `Body`, in order, looking for a match. 
    56   The implicit transitions (blank lines and indentation) are checked before 
    57   any others.  The 'text' transition is a catch-all (matches anything). 
    58 
    592. The method associated with the matched transition pattern is called. 
    60 
    61   A. Some transition methods are self-contained, appending elements to the 
    62      document tree (`Body.doctest` parses a doctest block).  The parser's 
    63      current line index is advanced to the end of the element, and parsing 
    64      continues with step 1. 
    65 
    66   B. Other transition methods trigger the creation of a nested state machine, 
    67      whose job is to parse a compound construct ('indent' does a block quote, 
    68      'bullet' does a bullet list, 'overline' does a section [first checking 
    69      for a valid section header], etc.). 
    70 
    71      - In the case of lists and explicit markup, a one-off state machine is 
    72        created and run to parse contents of the first item. 
    73 
    74      - A new state machine is created and its initial state is set to the 
    75        appropriate specialized state (`BulletList` in the case of the 
    76        'bullet' transition; see `SpecializedBody` for more detail).  This 
    77        state machine is run to parse the compound element (or series of 
    78        explicit markup elements), and returns as soon as a non-member element 
    79        is encountered.  For example, the `BulletList` state machine ends as 
    80        soon as it encounters an element which is not a list item of that 
    81        bullet list.  The optional omission of inter-element blank lines is 
    82        enabled by this nested state machine. 
    83 
    84      - The current line index is advanced to the end of the elements parsed, 
    85        and parsing continues with step 1. 
    86 
    87   C. The result of the 'text' transition depends on the next line of text. 
    88      The current state is changed to `Text`, under which the second line is 
    89      examined.  If the second line is: 
    90 
    91      - Indented: The element is a definition list item, and parsing proceeds 
    92        similarly to step 2.B, using the `DefinitionList` state. 
    93 
    94      - A line of uniform punctuation characters: The element is a section 
    95        header; again, parsing proceeds as in step 2.B, and `Body` is still 
    96        used. 
    97 
    98      - Anything else: The element is a paragraph, which is examined for 
    99        inline markup and appended to the parent element.  Processing 
    100        continues with step 1. 
    101""" 
    102 
    103__docformat__ = 'reStructuredText' 
    104 
    105 
    106import re 
    107from types import FunctionType, MethodType 
    108 
    109from docutils import nodes, statemachine, utils 
    110from docutils import ApplicationError, DataError 
    111from docutils.statemachine import StateMachineWS, StateWS 
    112from docutils.nodes import fully_normalize_name as normalize_name 
    113from docutils.nodes import unescape, whitespace_normalize_name 
    114import docutils.parsers.rst 
    115from docutils.parsers.rst import directives, languages, tableparser, roles 
    116from docutils.utils import escape2null, column_width 
    117from docutils.utils import punctuation_chars, roman, urischemes 
    118from docutils.utils import split_escaped_whitespace 
    119 
    120 
    121class MarkupError(DataError): pass 
    122class UnknownInterpretedRoleError(DataError): pass 
    123class InterpretedRoleNotImplementedError(DataError): pass 
    124class ParserError(ApplicationError): pass 
    125class MarkupMismatch(Exception): pass 
    126 
    127 
    128class Struct: 
    129 
    130    """Stores data attributes for dotted-attribute access.""" 
    131 
    132    def __init__(self, **keywordargs): 
    133        self.__dict__.update(keywordargs) 
    134 
    135 
    136class RSTStateMachine(StateMachineWS): 
    137 
    138    """ 
    139    reStructuredText's master StateMachine. 
    140 
    141    The entry point to reStructuredText parsing is the `run()` method. 
    142    """ 
    143 
    144    def run(self, input_lines, document, input_offset=0, match_titles=True, 
    145            inliner=None): 
    146        """ 
    147        Parse `input_lines` and modify the `document` node in place. 
    148 
    149        Extend `StateMachineWS.run()`: set up parse-global data and 
    150        run the StateMachine. 
    151        """ 
    152        self.language = languages.get_language( 
    153            document.settings.language_code, document.reporter) 
    154        self.match_titles = match_titles 
    155        if inliner is None: 
    156            inliner = Inliner() 
    157        inliner.init_customizations(document.settings) 
    158        self.memo = Struct(document=document, 
    159                           reporter=document.reporter, 
    160                           language=self.language, 
    161                           title_styles=[], 
    162                           section_level=0, 
    163                           section_bubble_up_kludge=False, 
    164                           inliner=inliner) 
    165        self.document = document 
    166        self.attach_observer(document.note_source) 
    167        self.reporter = self.memo.reporter 
    168        self.node = document 
    169        results = StateMachineWS.run(self, input_lines, input_offset, 
    170                                     input_source=document['source']) 
    171        assert results == [], 'RSTStateMachine.run() results should be empty!' 
    172        self.node = self.memo = None    # remove unneeded references 
    173 
    174 
    175class NestedStateMachine(StateMachineWS): 
    176 
    177    """ 
    178    StateMachine run from within other StateMachine runs, to parse nested 
    179    document structures. 
    180    """ 
    181 
    182    def run(self, input_lines, input_offset, memo, node, match_titles=True): 
    183        """ 
    184        Parse `input_lines` and populate a `docutils.nodes.document` instance. 
    185 
    186        Extend `StateMachineWS.run()`: set up document-wide data. 
    187        """ 
    188        self.match_titles = match_titles 
    189        self.memo = memo 
    190        self.document = memo.document 
    191        self.attach_observer(self.document.note_source) 
    192        self.reporter = memo.reporter 
    193        self.language = memo.language 
    194        self.node = node 
    195        results = StateMachineWS.run(self, input_lines, input_offset) 
    196        assert results == [], ('NestedStateMachine.run() results should be ' 
    197                               'empty!') 
    198        return results 
    199 
    200 
    201class RSTState(StateWS): 
    202 
    203    """ 
    204    reStructuredText State superclass. 
    205 
    206    Contains methods used by all State subclasses. 
    207    """ 
    208 
    209    nested_sm = NestedStateMachine 
    210    nested_sm_cache = [] 
    211 
    212    def __init__(self, state_machine, debug=False): 
    213        self.nested_sm_kwargs = {'state_classes': state_classes, 
    214                                 'initial_state': 'Body'} 
    215        StateWS.__init__(self, state_machine, debug) 
    216 
    217    def runtime_init(self): 
    218        StateWS.runtime_init(self) 
    219        memo = self.state_machine.memo 
    220        self.memo = memo 
    221        self.reporter = memo.reporter 
    222        self.inliner = memo.inliner 
    223        self.document = memo.document 
    224        self.parent = self.state_machine.node 
    225        # enable the reporter to determine source and source-line 
    226        if not hasattr(self.reporter, 'get_source_and_line'): 
    227            self.reporter.get_source_and_line = self.state_machine.get_source_and_line  # noqa:E501 
    228 
    229    def goto_line(self, abs_line_offset): 
    230        """ 
    231        Jump to input line `abs_line_offset`, ignoring jumps past the end. 
    232        """ 
    233        try: 
    234            self.state_machine.goto_line(abs_line_offset) 
    235        except EOFError: 
    236            pass 
    237 
    238    def no_match(self, context, transitions): 
    239        """ 
    240        Override `StateWS.no_match` to generate a system message. 
    241 
    242        This code should never be run. 
    243        """ 
    244        self.reporter.severe( 
    245            'Internal error: no transition pattern match.  State: "%s"; ' 
    246            'transitions: %s; context: %s; current line: %r.' 
    247            % (self.__class__.__name__, transitions, context, 
    248               self.state_machine.line)) 
    249        return context, None, [] 
    250 
    251    def bof(self, context): 
    252        """Called at beginning of file.""" 
    253        return [], [] 
    254 
    255    def nested_parse(self, block, input_offset, node, match_titles=False, 
    256                     state_machine_class=None, state_machine_kwargs=None): 
    257        """ 
    258        Create a new StateMachine rooted at `node` and run it over the input 
    259        `block`. 
    260        """ 
    261        use_default = 0 
    262        if state_machine_class is None: 
    263            state_machine_class = self.nested_sm 
    264            use_default += 1 
    265        if state_machine_kwargs is None: 
    266            state_machine_kwargs = self.nested_sm_kwargs 
    267            use_default += 1 
    268        block_length = len(block) 
    269 
    270        state_machine = None 
    271        if use_default == 2: 
    272            try: 
    273                state_machine = self.nested_sm_cache.pop() 
    274            except IndexError: 
    275                pass 
    276        if not state_machine: 
    277            state_machine = state_machine_class(debug=self.debug, 
    278                                                **state_machine_kwargs) 
    279        state_machine.run(block, input_offset, memo=self.memo, 
    280                          node=node, match_titles=match_titles) 
    281        if use_default == 2: 
    282            self.nested_sm_cache.append(state_machine) 
    283        else: 
    284            state_machine.unlink() 
    285        new_offset = state_machine.abs_line_offset() 
    286        # No `block.parent` implies disconnected -- lines aren't in sync: 
    287        if block.parent and (len(block) - block_length) != 0: 
    288            # Adjustment for block if modified in nested parse: 
    289            self.state_machine.next_line(len(block) - block_length) 
    290        return new_offset 
    291 
    292    def nested_list_parse(self, block, input_offset, node, initial_state, 
    293                          blank_finish, 
    294                          blank_finish_state=None, 
    295                          extra_settings={}, 
    296                          match_titles=False, 
    297                          state_machine_class=None, 
    298                          state_machine_kwargs=None): 
    299        """ 
    300        Create a new StateMachine rooted at `node` and run it over the input 
    301        `block`. Also keep track of optional intermediate blank lines and the 
    302        required final one. 
    303        """ 
    304        if state_machine_class is None: 
    305            state_machine_class = self.nested_sm 
    306        if state_machine_kwargs is None: 
    307            state_machine_kwargs = self.nested_sm_kwargs.copy() 
    308        state_machine_kwargs['initial_state'] = initial_state 
    309        state_machine = state_machine_class(debug=self.debug, 
    310                                            **state_machine_kwargs) 
    311        if blank_finish_state is None: 
    312            blank_finish_state = initial_state 
    313        state_machine.states[blank_finish_state].blank_finish = blank_finish 
    314        for key, value in extra_settings.items(): 
    315            setattr(state_machine.states[initial_state], key, value) 
    316        state_machine.run(block, input_offset, memo=self.memo, 
    317                          node=node, match_titles=match_titles) 
    318        blank_finish = state_machine.states[blank_finish_state].blank_finish 
    319        state_machine.unlink() 
    320        return state_machine.abs_line_offset(), blank_finish 
    321 
    322    def section(self, title, source, style, lineno, messages): 
    323        """Check for a valid subsection and create one if it checks out.""" 
    324        if self.check_subsection(source, style, lineno): 
    325            self.new_subsection(title, lineno, messages) 
    326 
    327    def check_subsection(self, source, style, lineno): 
    328        """ 
    329        Check for a valid subsection header.  Return True or False. 
    330 
    331        When a new section is reached that isn't a subsection of the current 
    332        section, back up the line count (use ``previous_line(-x)``), then 
    333        ``raise EOFError``.  The current StateMachine will finish, then the 
    334        calling StateMachine can re-examine the title.  This will work its way 
    335        back up the calling chain until the correct section level isreached. 
    336 
    337        @@@ Alternative: Evaluate the title, store the title info & level, and 
    338        back up the chain until that level is reached.  Store in memo? Or 
    339        return in results? 
    340 
    341        :Exception: `EOFError` when a sibling or supersection encountered. 
    342        """ 
    343        memo = self.memo 
    344        title_styles = memo.title_styles 
    345        mylevel = memo.section_level 
    346        try:                            # check for existing title style 
    347            level = title_styles.index(style) + 1 
    348        except ValueError:              # new title style 
    349            if len(title_styles) == memo.section_level:  # new subsection 
    350                title_styles.append(style) 
    351                return True 
    352            else:                       # not at lowest level 
    353                self.parent += self.title_inconsistent(source, lineno) 
    354                return False 
    355        if level <= mylevel:            # sibling or supersection 
    356            memo.section_level = level   # bubble up to parent section 
    357            if len(style) == 2: 
    358                memo.section_bubble_up_kludge = True 
    359            # back up 2 lines for underline title, 3 for overline title 
    360            self.state_machine.previous_line(len(style) + 1) 
    361            raise EOFError              # let parent section re-evaluate 
    362        if level == mylevel + 1:        # immediate subsection 
    363            return True 
    364        else:                           # invalid subsection 
    365            self.parent += self.title_inconsistent(source, lineno) 
    366            return False 
    367 
    368    def title_inconsistent(self, sourcetext, lineno): 
    369        error = self.reporter.severe( 
    370            'Title level inconsistent:', nodes.literal_block('', sourcetext), 
    371            line=lineno) 
    372        return error 
    373 
    374    def new_subsection(self, title, lineno, messages): 
    375        """Append new subsection to document tree. On return, check level.""" 
    376        memo = self.memo 
    377        mylevel = memo.section_level 
    378        memo.section_level += 1 
    379        section_node = nodes.section() 
    380        self.parent += section_node 
    381        textnodes, title_messages = self.inline_text(title, lineno) 
    382        titlenode = nodes.title(title, '', *textnodes) 
    383        name = normalize_name(titlenode.astext()) 
    384        section_node['names'].append(name) 
    385        section_node += titlenode 
    386        section_node += messages 
    387        section_node += title_messages 
    388        self.document.note_implicit_target(section_node, section_node) 
    389        offset = self.state_machine.line_offset + 1 
    390        absoffset = self.state_machine.abs_line_offset() + 1 
    391        newabsoffset = self.nested_parse( 
    392              self.state_machine.input_lines[offset:], input_offset=absoffset, 
    393              node=section_node, match_titles=True) 
    394        self.goto_line(newabsoffset) 
    395        if memo.section_level <= mylevel:  # can't handle next section? 
    396            raise EOFError                 # bubble up to supersection 
    397        # reset section_level; next pass will detect it properly 
    398        memo.section_level = mylevel 
    399 
    400    def paragraph(self, lines, lineno): 
    401        """ 
    402        Return a list (paragraph & messages) & a boolean: literal_block next? 
    403        """ 
    404        data = '\n'.join(lines).rstrip() 
    405        if re.search(r'(?<!\\)(\\\\)*::$', data): 
    406            if len(data) == 2: 
    407                return [], 1 
    408            elif data[-3] in ' \n': 
    409                text = data[:-3].rstrip() 
    410            else: 
    411                text = data[:-1] 
    412            literalnext = 1 
    413        else: 
    414            text = data 
    415            literalnext = 0 
    416        textnodes, messages = self.inline_text(text, lineno) 
    417        p = nodes.paragraph(data, '', *textnodes) 
    418        p.source, p.line = self.state_machine.get_source_and_line(lineno) 
    419        return [p] + messages, literalnext 
    420 
    421    def inline_text(self, text, lineno): 
    422        """ 
    423        Return 2 lists: nodes (text and inline elements), and system_messages. 
    424        """ 
    425        nodes, messages = self.inliner.parse(text, lineno, 
    426                                             self.memo, self.parent) 
    427        return nodes, messages 
    428 
    429    def unindent_warning(self, node_name): 
    430        # the actual problem is one line below the current line 
    431        lineno = self.state_machine.abs_line_number() + 1 
    432        return self.reporter.warning('%s ends without a blank line; ' 
    433                                     'unexpected unindent.' % node_name, 
    434                                     line=lineno) 
    435 
    436 
    437def build_regexp(definition, compile=True): 
    438    """ 
    439    Build, compile and return a regular expression based on `definition`. 
    440 
    441    :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts), 
    442        where "parts" is a list of regular expressions and/or regular 
    443        expression definitions to be joined into an or-group. 
    444    """ 
    445    name, prefix, suffix, parts = definition 
    446    part_strings = [] 
    447    for part in parts: 
    448        if isinstance(part, tuple): 
    449            part_strings.append(build_regexp(part, None)) 
    450        else: 
    451            part_strings.append(part) 
    452    or_group = '|'.join(part_strings) 
    453    regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals() 
    454    if compile: 
    455        return re.compile(regexp) 
    456    else: 
    457        return regexp 
    458 
    459 
    460class Inliner: 
    461 
    462    """ 
    463    Parse inline markup; call the `parse()` method. 
    464    """ 
    465 
    466    def __init__(self): 
    467        self.implicit_dispatch = [] 
    468        """List of (pattern, bound method) tuples, used by 
    469        `self.implicit_inline`.""" 
    470 
    471    def init_customizations(self, settings): 
    472        # lookahead and look-behind expressions for inline markup rules 
    473        if getattr(settings, 'character_level_inline_markup', False): 
    474            start_string_prefix = '(^|(?<!\x00))' 
    475            end_string_suffix = '' 
    476        else: 
    477            start_string_prefix = ('(^|(?<=\\s|[%s%s]))' % 
    478                                   (punctuation_chars.openers, 
    479                                    punctuation_chars.delimiters)) 
    480            end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' % 
    481                                 (punctuation_chars.closing_delimiters, 
    482                                  punctuation_chars.delimiters, 
    483                                  punctuation_chars.closers)) 
    484        args = locals().copy() 
    485        args.update(vars(self.__class__)) 
    486 
    487        parts = ('initial_inline', start_string_prefix, '', 
    488           [ 
    489            ('start', '', self.non_whitespace_after,  # simple start-strings 
    490             [r'\*\*',                # strong 
    491              r'\*(?!\*)',            # emphasis but not strong 
    492              r'``',                  # literal 
    493              r'_`',                  # inline internal target 
    494              r'\|(?!\|)']            # substitution reference 
    495             ), 
    496            ('whole', '', end_string_suffix,  # whole constructs 
    497             [  # reference name & end-string 
    498              r'(?P<refname>%s)(?P<refend>__?)' % self.simplename, 
    499              ('footnotelabel', r'\[', r'(?P<fnend>\]_)', 
    500               [r'[0-9]+',                     # manually numbered 
    501                r'\#(%s)?' % self.simplename,  # auto-numbered (w/ label?) 
    502                r'\*',                         # auto-symbol 
    503                r'(?P<citationlabel>%s)' % self.simplename,  # citation ref 
    504                ] 
    505               ) 
    506              ] 
    507             ), 
    508            ('backquote',             # interpreted text or phrase reference 
    509             '(?P<role>(:%s:)?)' % self.simplename,  # optional role 
    510             self.non_whitespace_after, 
    511             ['`(?!`)']               # but not literal 
    512             ) 
    513            ] 
    514        ) 
    515        self.start_string_prefix = start_string_prefix 
    516        self.end_string_suffix = end_string_suffix 
    517        self.parts = parts 
    518 
    519        self.patterns = Struct( 
    520          initial=build_regexp(parts), 
    521          emphasis=re.compile(self.non_whitespace_escape_before 
    522                              + r'(\*)' + end_string_suffix), 
    523          strong=re.compile(self.non_whitespace_escape_before 
    524                            + r'(\*\*)' + end_string_suffix), 
    525          interpreted_or_phrase_ref=re.compile( 
    526              r""" 
    527              %(non_unescaped_whitespace_escape_before)s 
    528              ( 
    529                ` 
    530                (?P<suffix> 
    531                  (?P<role>:%(simplename)s:)? 
    532                  (?P<refend>__?)? 
    533                ) 
    534              ) 
    535              %(end_string_suffix)s 
    536              """ % args, re.VERBOSE), 
    537          embedded_link=re.compile( 
    538              r""" 
    539              ( 
    540                (?:[ \n]+|^)            # spaces or beginning of line/string 
    541                <                       # open bracket 
    542                %(non_whitespace_after)s 
    543                (([^<>]|\x00[<>])+)     # anything but unescaped angle brackets 
    544                %(non_whitespace_escape_before)s 
    545                >                       # close bracket 
    546              ) 
    547              $                         # end of string 
    548              """ % args, re.VERBOSE), 
    549          literal=re.compile(self.non_whitespace_before + '(``)' 
    550                             + end_string_suffix), 
    551          target=re.compile(self.non_whitespace_escape_before 
    552                            + r'(`)' + end_string_suffix), 
    553          substitution_ref=re.compile(self.non_whitespace_escape_before 
    554                                      + r'(\|_{0,2})' 
    555                                      + end_string_suffix), 
    556          email=re.compile(self.email_pattern % args + '$', 
    557                           re.VERBOSE), 
    558          uri=re.compile( 
    559                (r""" 
    560                %(start_string_prefix)s 
    561                (?P<whole> 
    562                  (?P<absolute>           # absolute URI 
    563                    (?P<scheme>             # scheme (http, ftp, mailto) 
    564                      [a-zA-Z][a-zA-Z0-9.+-]* 
    565                    ) 
    566                    : 
    567                    ( 
    568                      (                       # either: 
    569                        (//?)?                  # hierarchical URI 
    570                        %(uric)s*               # URI characters 
    571                        %(uri_end)s             # final URI char 
    572                      ) 
    573                      (                       # optional query 
    574                        \?%(uric)s* 
    575                        %(uri_end)s 
    576                      )? 
    577                      (                       # optional fragment 
    578                        \#%(uric)s* 
    579                        %(uri_end)s 
    580                      )? 
    581                    ) 
    582                  ) 
    583                |                       # *OR* 
    584                  (?P<email>              # email address 
    585                    """ + self.email_pattern + r""" 
    586                  ) 
    587                ) 
    588                %(end_string_suffix)s 
    589                """) % args, re.VERBOSE), 
    590          pep=re.compile( 
    591                r""" 
    592                %(start_string_prefix)s 
    593                ( 
    594                  (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file 
    595                | 
    596                  (PEP\s+(?P<pepnum2>\d+))      # reference by name 
    597                ) 
    598                %(end_string_suffix)s""" % args, re.VERBOSE), 
    599          rfc=re.compile( 
    600                r""" 
    601                %(start_string_prefix)s 
    602                (RFC(-|\s+)?(?P<rfcnum>\d+)) 
    603                %(end_string_suffix)s""" % args, re.VERBOSE)) 
    604 
    605        self.implicit_dispatch.append((self.patterns.uri, 
    606                                       self.standalone_uri)) 
    607        if settings.pep_references: 
    608            self.implicit_dispatch.append((self.patterns.pep, 
    609                                           self.pep_reference)) 
    610        if settings.rfc_references: 
    611            self.implicit_dispatch.append((self.patterns.rfc, 
    612                                           self.rfc_reference)) 
    613 
    614    def parse(self, text, lineno, memo, parent): 
    615        # Needs to be refactored for nested inline markup. 
    616        # Add nested_parse() method? 
    617        """ 
    618        Return 2 lists: nodes (text and inline elements), and system_messages. 
    619 
    620        Using `self.patterns.initial`, a pattern which matches start-strings 
    621        (emphasis, strong, interpreted, phrase reference, literal, 
    622        substitution reference, and inline target) and complete constructs 
    623        (simple reference, footnote reference), search for a candidate.  When 
    624        one is found, check for validity (e.g., not a quoted '*' character). 
    625        If valid, search for the corresponding end string if applicable, and 
    626        check it for validity.  If not found or invalid, generate a warning 
    627        and ignore the start-string.  Implicit inline markup (e.g. standalone 
    628        URIs) is found last. 
    629 
    630        :text: source string 
    631        :lineno: absolute line number (cf. statemachine.get_source_and_line()) 
    632        """ 
    633        self.reporter = memo.reporter 
    634        self.document = memo.document 
    635        self.language = memo.language 
    636        self.parent = parent 
    637        pattern_search = self.patterns.initial.search 
    638        dispatch = self.dispatch 
    639        remaining = escape2null(text) 
    640        processed = [] 
    641        unprocessed = [] 
    642        messages = [] 
    643        while remaining: 
    644            match = pattern_search(remaining) 
    645            if match: 
    646                groups = match.groupdict() 
    647                method = dispatch[groups['start'] or groups['backquote'] 
    648                                  or groups['refend'] or groups['fnend']] 
    649                before, inlines, remaining, sysmessages = method(self, match, 
    650                                                                 lineno) 
    651                unprocessed.append(before) 
    652                messages += sysmessages 
    653                if inlines: 
    654                    processed += self.implicit_inline(''.join(unprocessed), 
    655                                                      lineno) 
    656                    processed += inlines 
    657                    unprocessed = [] 
    658            else: 
    659                break 
    660        remaining = ''.join(unprocessed) + remaining 
    661        if remaining: 
    662            processed += self.implicit_inline(remaining, lineno) 
    663        return processed, messages 
    664 
    665    # Inline object recognition 
    666    # ------------------------- 
    667    # See also init_customizations(). 
    668    non_whitespace_before = r'(?<!\s)' 
    669    non_whitespace_escape_before = r'(?<![\s\x00])' 
    670    non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])' 
    671    non_whitespace_after = r'(?!\s)' 
    672    # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together): 
    673    simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*' 
    674    # Valid URI characters (see RFC 2396 & RFC 2732); 
    675    # final \x00 allows backslash escapes in URIs: 
    676    uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]""" 
    677    # Delimiter indicating the end of a URI (not part of the URI): 
    678    uri_end_delim = r"""[>]""" 
    679    # Last URI character; same as uric but no punctuation: 
    680    urilast = r"""[_~*/=+a-zA-Z0-9]""" 
    681    # End of a URI (either 'urilast' or 'uric followed by a 
    682    # uri_end_delim'): 
    683    uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals() 
    684    emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]""" 
    685    email_pattern = r""" 
    686          %(emailc)s+(?:\.%(emailc)s+)*   # name 
    687          (?<!\x00)@                      # at 
    688          %(emailc)s+(?:\.%(emailc)s*)*   # host 
    689          %(uri_end)s                     # final URI char 
    690          """ 
    691 
    692    def quoted_start(self, match): 
    693        """Test if inline markup start-string is 'quoted'. 
    694 
    695        'Quoted' in this context means the start-string is enclosed in a pair 
    696        of matching opening/closing delimiters (not necessarily quotes) 
    697        or at the end of the match. 
    698        """ 
    699        string = match.string 
    700        start = match.start() 
    701        if start == 0:                  # start-string at beginning of text 
    702            return False 
    703        prestart = string[start - 1] 
    704        try: 
    705            poststart = string[match.end()] 
    706        except IndexError:          # start-string at end of text 
    707            return True  # not "quoted" but no markup start-string either 
    708        return punctuation_chars.match_chars(prestart, poststart) 
    709 
    710    def inline_obj(self, match, lineno, end_pattern, nodeclass, 
    711                   restore_backslashes=False): 
    712        string = match.string 
    713        matchstart = match.start('start') 
    714        matchend = match.end('start') 
    715        if self.quoted_start(match): 
    716            return string[:matchend], [], string[matchend:], [], '' 
    717        endmatch = end_pattern.search(string[matchend:]) 
    718        if endmatch and endmatch.start(1):  # 1 or more chars 
    719            text = endmatch.string[:endmatch.start(1)] 
    720            if restore_backslashes: 
    721                text = unescape(text, True) 
    722            textend = matchend + endmatch.end(1) 
    723            rawsource = unescape(string[matchstart:textend], True) 
    724            node = nodeclass(rawsource, text) 
    725            return (string[:matchstart], [node], 
    726                    string[textend:], [], endmatch.group(1)) 
    727        msg = self.reporter.warning( 
    728              'Inline %s start-string without end-string.' 
    729              % nodeclass.__name__, line=lineno) 
    730        text = unescape(string[matchstart:matchend], True) 
    731        prb = self.problematic(text, text, msg) 
    732        return string[:matchstart], [prb], string[matchend:], [msg], '' 
    733 
    734    def problematic(self, text, rawsource, message): 
    735        msgid = self.document.set_id(message, self.parent) 
    736        problematic = nodes.problematic(rawsource, text, refid=msgid) 
    737        prbid = self.document.set_id(problematic) 
    738        message.add_backref(prbid) 
    739        return problematic 
    740 
    741    def emphasis(self, match, lineno): 
    742        before, inlines, remaining, sysmessages, endstring = self.inline_obj( 
    743              match, lineno, self.patterns.emphasis, nodes.emphasis) 
    744        return before, inlines, remaining, sysmessages 
    745 
    746    def strong(self, match, lineno): 
    747        before, inlines, remaining, sysmessages, endstring = self.inline_obj( 
    748              match, lineno, self.patterns.strong, nodes.strong) 
    749        return before, inlines, remaining, sysmessages 
    750 
    751    def interpreted_or_phrase_ref(self, match, lineno): 
    752        end_pattern = self.patterns.interpreted_or_phrase_ref 
    753        string = match.string 
    754        matchstart = match.start('backquote') 
    755        matchend = match.end('backquote') 
    756        rolestart = match.start('role') 
    757        role = match.group('role') 
    758        position = '' 
    759        if role: 
    760            role = role[1:-1] 
    761            position = 'prefix' 
    762        elif self.quoted_start(match): 
    763            return string[:matchend], [], string[matchend:], [] 
    764        endmatch = end_pattern.search(string[matchend:]) 
    765        if endmatch and endmatch.start(1):  # 1 or more chars 
    766            textend = matchend + endmatch.end() 
    767            if endmatch.group('role'): 
    768                if role: 
    769                    msg = self.reporter.warning( 
    770                        'Multiple roles in interpreted text (both ' 
    771                        'prefix and suffix present; only one allowed).', 
    772                        line=lineno) 
    773                    text = unescape(string[rolestart:textend], True) 
    774                    prb = self.problematic(text, text, msg) 
    775                    return string[:rolestart], [prb], string[textend:], [msg] 
    776                role = endmatch.group('suffix')[1:-1] 
    777                position = 'suffix' 
    778            escaped = endmatch.string[:endmatch.start(1)] 
    779            rawsource = unescape(string[matchstart:textend], True) 
    780            if rawsource[-1:] == '_': 
    781                if role: 
    782                    msg = self.reporter.warning( 
    783                          'Mismatch: both interpreted text role %s and ' 
    784                          'reference suffix.' % position, line=lineno) 
    785                    text = unescape(string[rolestart:textend], True) 
    786                    prb = self.problematic(text, text, msg) 
    787                    return string[:rolestart], [prb], string[textend:], [msg] 
    788                return self.phrase_ref(string[:matchstart], string[textend:], 
    789                                       rawsource, escaped) 
    790            else: 
    791                rawsource = unescape(string[rolestart:textend], True) 
    792                nodelist, messages = self.interpreted(rawsource, escaped, role, 
    793                                                      lineno) 
    794                return (string[:rolestart], nodelist, 
    795                        string[textend:], messages) 
    796        msg = self.reporter.warning( 
    797              'Inline interpreted text or phrase reference start-string ' 
    798              'without end-string.', line=lineno) 
    799        text = unescape(string[matchstart:matchend], True) 
    800        prb = self.problematic(text, text, msg) 
    801        return string[:matchstart], [prb], string[matchend:], [msg] 
    802 
    803    def phrase_ref(self, before, after, rawsource, escaped, text=None): 
    804        # `text` is ignored (since 0.16) 
    805        match = self.patterns.embedded_link.search(escaped) 
    806        if match:  # embedded <URI> or <alias_> 
    807            text = escaped[:match.start(0)] 
    808            unescaped = unescape(text) 
    809            rawtext = unescape(text, True) 
    810            aliastext = match.group(2) 
    811            rawaliastext = unescape(aliastext, True) 
    812            underscore_escaped = rawaliastext.endswith(r'\_') 
    813            if (aliastext.endswith('_') 
    814                and not (underscore_escaped 
    815                         or self.patterns.uri.match(aliastext))): 
    816                aliastype = 'name' 
    817                alias = normalize_name(unescape(aliastext[:-1])) 
    818                target = nodes.target(match.group(1), refname=alias) 
    819                target.indirect_reference_name = whitespace_normalize_name( 
    820                                                    unescape(aliastext[:-1])) 
    821            else: 
    822                aliastype = 'uri' 
    823                # remove unescaped whitespace 
    824                alias_parts = split_escaped_whitespace(match.group(2)) 
    825                alias = ' '.join(''.join(part.split()) 
    826                                 for part in alias_parts) 
    827                alias = self.adjust_uri(unescape(alias)) 
    828                if alias.endswith(r'\_'): 
    829                    alias = alias[:-2] + '_' 
    830                target = nodes.target(match.group(1), refuri=alias) 
    831                target.referenced = 1 
    832            if not aliastext: 
    833                raise ApplicationError('problem with embedded link: %r' 
    834                                       % aliastext) 
    835            if not text: 
    836                text = alias 
    837                unescaped = unescape(text) 
    838                rawtext = rawaliastext 
    839        else: 
    840            text = escaped 
    841            unescaped = unescape(text) 
    842            target = None 
    843            rawtext = unescape(escaped, True) 
    844 
    845        refname = normalize_name(unescaped) 
    846        reference = nodes.reference(rawsource, text, 
    847                                    name=whitespace_normalize_name(unescaped)) 
    848        reference[0].rawsource = rawtext 
    849 
    850        node_list = [reference] 
    851 
    852        if rawsource[-2:] == '__': 
    853            if target and (aliastype == 'name'): 
    854                reference['refname'] = alias 
    855                self.document.note_refname(reference) 
    856                # self.document.note_indirect_target(target) # required? 
    857            elif target and (aliastype == 'uri'): 
    858                reference['refuri'] = alias 
    859            else: 
    860                reference['anonymous'] = 1 
    861        else: 
    862            if target: 
    863                target['names'].append(refname) 
    864                if aliastype == 'name': 
    865                    reference['refname'] = alias 
    866                    self.document.note_indirect_target(target) 
    867                    self.document.note_refname(reference) 
    868                else: 
    869                    reference['refuri'] = alias 
    870                    self.document.note_explicit_target(target, self.parent) 
    871                # target.note_referenced_by(name=refname) 
    872                node_list.append(target) 
    873            else: 
    874                reference['refname'] = refname 
    875                self.document.note_refname(reference) 
    876        return before, node_list, after, [] 
    877 
    878    def adjust_uri(self, uri): 
    879        match = self.patterns.email.match(uri) 
    880        if match: 
    881            return 'mailto:' + uri 
    882        else: 
    883            return uri 
    884 
    885    def interpreted(self, rawsource, text, role, lineno): 
    886        role_fn, messages = roles.role(role, self.language, lineno, 
    887                                       self.reporter) 
    888        if role_fn: 
    889            nodes, messages2 = role_fn(role, rawsource, text, lineno, self) 
    890            return nodes, messages + messages2 
    891        else: 
    892            msg = self.reporter.error( 
    893                'Unknown interpreted text role "%s".' % role, 
    894                line=lineno) 
    895            return ([self.problematic(rawsource, rawsource, msg)], 
    896                    messages + [msg]) 
    897 
    898    def literal(self, match, lineno): 
    899        before, inlines, remaining, sysmessages, endstring = self.inline_obj( 
    900              match, lineno, self.patterns.literal, nodes.literal, 
    901              restore_backslashes=True) 
    902        return before, inlines, remaining, sysmessages 
    903 
    904    def inline_internal_target(self, match, lineno): 
    905        before, inlines, remaining, sysmessages, endstring = self.inline_obj( 
    906              match, lineno, self.patterns.target, nodes.target) 
    907        if inlines and isinstance(inlines[0], nodes.target): 
    908            assert len(inlines) == 1 
    909            target = inlines[0] 
    910            name = normalize_name(target.astext()) 
    911            target['names'].append(name) 
    912            self.document.note_explicit_target(target, self.parent) 
    913        return before, inlines, remaining, sysmessages 
    914 
    915    def substitution_reference(self, match, lineno): 
    916        before, inlines, remaining, sysmessages, endstring = self.inline_obj( 
    917              match, lineno, self.patterns.substitution_ref, 
    918              nodes.substitution_reference) 
    919        if len(inlines) == 1: 
    920            subref_node = inlines[0] 
    921            if isinstance(subref_node, nodes.substitution_reference): 
    922                subref_text = subref_node.astext() 
    923                self.document.note_substitution_ref(subref_node, subref_text) 
    924                if endstring[-1:] == '_': 
    925                    reference_node = nodes.reference( 
    926                        '|%s%s' % (subref_text, endstring), '') 
    927                    if endstring[-2:] == '__': 
    928                        reference_node['anonymous'] = 1 
    929                    else: 
    930                        reference_node['refname'] = normalize_name(subref_text) 
    931                        self.document.note_refname(reference_node) 
    932                    reference_node += subref_node 
    933                    inlines = [reference_node] 
    934        return before, inlines, remaining, sysmessages 
    935 
    936    def footnote_reference(self, match, lineno): 
    937        """ 
    938        Handles `nodes.footnote_reference` and `nodes.citation_reference` 
    939        elements. 
    940        """ 
    941        label = match.group('footnotelabel') 
    942        refname = normalize_name(label) 
    943        string = match.string 
    944        before = string[:match.start('whole')] 
    945        remaining = string[match.end('whole'):] 
    946        if match.group('citationlabel'): 
    947            refnode = nodes.citation_reference('[%s]_' % label, 
    948                                               refname=refname) 
    949            refnode += nodes.Text(label) 
    950            self.document.note_citation_ref(refnode) 
    951        else: 
    952            refnode = nodes.footnote_reference('[%s]_' % label) 
    953            if refname[0] == '#': 
    954                refname = refname[1:] 
    955                refnode['auto'] = 1 
    956                self.document.note_autofootnote_ref(refnode) 
    957            elif refname == '*': 
    958                refname = '' 
    959                refnode['auto'] = '*' 
    960                self.document.note_symbol_footnote_ref( 
    961                      refnode) 
    962            else: 
    963                refnode += nodes.Text(label) 
    964            if refname: 
    965                refnode['refname'] = refname 
    966                self.document.note_footnote_ref(refnode) 
    967            if utils.get_trim_footnote_ref_space(self.document.settings): 
    968                before = before.rstrip() 
    969        return before, [refnode], remaining, [] 
    970 
    971    def reference(self, match, lineno, anonymous=False): 
    972        referencename = match.group('refname') 
    973        refname = normalize_name(referencename) 
    974        referencenode = nodes.reference( 
    975            referencename + match.group('refend'), referencename, 
    976            name=whitespace_normalize_name(referencename)) 
    977        referencenode[0].rawsource = referencename 
    978        if anonymous: 
    979            referencenode['anonymous'] = 1 
    980        else: 
    981            referencenode['refname'] = refname 
    982            self.document.note_refname(referencenode) 
    983        string = match.string 
    984        matchstart = match.start('whole') 
    985        matchend = match.end('whole') 
    986        return string[:matchstart], [referencenode], string[matchend:], [] 
    987 
    988    def anonymous_reference(self, match, lineno): 
    989        return self.reference(match, lineno, anonymous=True) 
    990 
    991    def standalone_uri(self, match, lineno): 
    992        if (not match.group('scheme') 
    993                or match.group('scheme').lower() in urischemes.schemes): 
    994            if match.group('email'): 
    995                addscheme = 'mailto:' 
    996            else: 
    997                addscheme = '' 
    998            text = match.group('whole') 
    999            refuri = addscheme + unescape(text) 
    1000            reference = nodes.reference(unescape(text, True), text, 
    1001                                        refuri=refuri) 
    1002            return [reference] 
    1003        else:                   # not a valid scheme 
    1004            raise MarkupMismatch 
    1005 
    1006    def pep_reference(self, match, lineno): 
    1007        text = match.group(0) 
    1008        if text.startswith('pep-'): 
    1009            pepnum = int(unescape(match.group('pepnum1'))) 
    1010        elif text.startswith('PEP'): 
    1011            pepnum = int(unescape(match.group('pepnum2'))) 
    1012        else: 
    1013            raise MarkupMismatch 
    1014        ref = (self.document.settings.pep_base_url 
    1015               + self.document.settings.pep_file_url_template % pepnum) 
    1016        return [nodes.reference(unescape(text, True), text, refuri=ref)] 
    1017 
    1018    rfc_url = 'rfc%d.html' 
    1019 
    1020    def rfc_reference(self, match, lineno): 
    1021        text = match.group(0) 
    1022        if text.startswith('RFC'): 
    1023            rfcnum = int(unescape(match.group('rfcnum'))) 
    1024            ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum 
    1025        else: 
    1026            raise MarkupMismatch 
    1027        return [nodes.reference(unescape(text, True), text, refuri=ref)] 
    1028 
    1029    def implicit_inline(self, text, lineno): 
    1030        """ 
    1031        Check each of the patterns in `self.implicit_dispatch` for a match, 
    1032        and dispatch to the stored method for the pattern.  Recursively check 
    1033        the text before and after the match.  Return a list of `nodes.Text` 
    1034        and inline element nodes. 
    1035        """ 
    1036        if not text: 
    1037            return [] 
    1038        for pattern, method in self.implicit_dispatch: 
    1039            match = pattern.search(text) 
    1040            if match: 
    1041                try: 
    1042                    # Must recurse on strings before *and* after the match; 
    1043                    # there may be multiple patterns. 
    1044                    return (self.implicit_inline(text[:match.start()], lineno) 
    1045                            + method(match, lineno) 
    1046                            + self.implicit_inline(text[match.end():], lineno)) 
    1047                except MarkupMismatch: 
    1048                    pass 
    1049        return [nodes.Text(text)] 
    1050 
    1051    dispatch = {'*': emphasis, 
    1052                '**': strong, 
    1053                '`': interpreted_or_phrase_ref, 
    1054                '``': literal, 
    1055                '_`': inline_internal_target, 
    1056                ']_': footnote_reference, 
    1057                '|': substitution_reference, 
    1058                '_': reference, 
    1059                '__': anonymous_reference} 
    1060 
    1061 
    1062def _loweralpha_to_int(s, _zero=(ord('a')-1)): 
    1063    return ord(s) - _zero 
    1064 
    1065 
    1066def _upperalpha_to_int(s, _zero=(ord('A')-1)): 
    1067    return ord(s) - _zero 
    1068 
    1069 
    1070def _lowerroman_to_int(s): 
    1071    return roman.fromRoman(s.upper()) 
    1072 
    1073 
    1074class Body(RSTState): 
    1075 
    1076    """ 
    1077    Generic classifier of the first line of a block. 
    1078    """ 
    1079 
    1080    double_width_pad_char = tableparser.TableParser.double_width_pad_char 
    1081    """Padding character for East Asian double-width text.""" 
    1082 
    1083    enum = Struct() 
    1084    """Enumerated list parsing information.""" 
    1085 
    1086    enum.formatinfo = { 
    1087          'parens': Struct(prefix='(', suffix=')', start=1, end=-1), 
    1088          'rparen': Struct(prefix='', suffix=')', start=0, end=-1), 
    1089          'period': Struct(prefix='', suffix='.', start=0, end=-1)} 
    1090    enum.formats = enum.formatinfo.keys() 
    1091    enum.sequences = ['arabic', 'loweralpha', 'upperalpha', 
    1092                      'lowerroman', 'upperroman']  # ORDERED! 
    1093    enum.sequencepats = {'arabic': '[0-9]+', 
    1094                         'loweralpha': '[a-z]', 
    1095                         'upperalpha': '[A-Z]', 
    1096                         'lowerroman': '[ivxlcdm]+', 
    1097                         'upperroman': '[IVXLCDM]+'} 
    1098    enum.converters = {'arabic': int, 
    1099                       'loweralpha': _loweralpha_to_int, 
    1100                       'upperalpha': _upperalpha_to_int, 
    1101                       'lowerroman': _lowerroman_to_int, 
    1102                       'upperroman': roman.fromRoman} 
    1103 
    1104    enum.sequenceregexps = {} 
    1105    for sequence in enum.sequences: 
    1106        enum.sequenceregexps[sequence] = re.compile( 
    1107              enum.sequencepats[sequence] + '$') 
    1108 
    1109    grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$') 
    1110    """Matches the top (& bottom) of a full table).""" 
    1111 
    1112    simple_table_top_pat = re.compile('=+( +=+)+ *$') 
    1113    """Matches the top of a simple table.""" 
    1114 
    1115    simple_table_border_pat = re.compile('=+[ =]*$') 
    1116    """Matches the bottom & header bottom of a simple table.""" 
    1117 
    1118    pats = {} 
    1119    """Fragments of patterns used by transitions.""" 
    1120 
    1121    pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]' 
    1122    pats['alpha'] = '[a-zA-Z]' 
    1123    pats['alphanum'] = '[a-zA-Z0-9]' 
    1124    pats['alphanumplus'] = '[a-zA-Z0-9_-]' 
    1125    pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s' 
    1126                    '|%(upperroman)s|#)' % enum.sequencepats) 
    1127    pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats 
    1128    # @@@ Loosen up the pattern?  Allow Unicode? 
    1129    pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats 
    1130    pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats 
    1131    pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats 
    1132    pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats 
    1133 
    1134    for format in enum.formats: 
    1135        pats[format] = '(?P<%s>%s%s%s)' % ( 
    1136              format, re.escape(enum.formatinfo[format].prefix), 
    1137              pats['enum'], re.escape(enum.formatinfo[format].suffix)) 
    1138 
    1139    patterns = { 
    1140          'bullet': '[-+*\u2022\u2023\u2043]( +|$)', 
    1141          'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats, 
    1142          'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)', 
    1143          'option_marker': r'%(option)s(, %(option)s)*(  +| ?$)' % pats, 
    1144          'doctest': r'>>>( +|$)', 
    1145          'line_block': r'\|( +|$)', 
    1146          'grid_table_top': grid_table_top_pat, 
    1147          'simple_table_top': simple_table_top_pat, 
    1148          'explicit_markup': r'\.\.( +|$)', 
    1149          'anonymous': r'__( +|$)', 
    1150          'line': r'(%(nonalphanum7bit)s)\1* *$' % pats, 
    1151          'text': r''} 
    1152    initial_transitions = ( 
    1153          'bullet', 
    1154          'enumerator', 
    1155          'field_marker', 
    1156          'option_marker', 
    1157          'doctest', 
    1158          'line_block', 
    1159          'grid_table_top', 
    1160          'simple_table_top', 
    1161          'explicit_markup', 
    1162          'anonymous', 
    1163          'line', 
    1164          'text') 
    1165 
    1166    def indent(self, match, context, next_state): 
    1167        """Block quote.""" 
    1168        (indented, indent, line_offset, blank_finish 
    1169         ) = self.state_machine.get_indented() 
    1170        elements = self.block_quote(indented, line_offset) 
    1171        self.parent += elements 
    1172        if not blank_finish: 
    1173            self.parent += self.unindent_warning('Block quote') 
    1174        return context, next_state, [] 
    1175 
    1176    def block_quote(self, indented, line_offset): 
    1177        elements = [] 
    1178        while indented: 
    1179            blockquote = nodes.block_quote(rawsource='\n'.join(indented)) 
    1180            (blockquote.source, blockquote.line 
    1181             ) = self.state_machine.get_source_and_line(line_offset+1) 
    1182            (blockquote_lines, 
    1183             attribution_lines, 
    1184             attribution_offset, 
    1185             indented, 
    1186             new_line_offset) = self.split_attribution(indented, line_offset) 
    1187            self.nested_parse(blockquote_lines, line_offset, blockquote) 
    1188            elements.append(blockquote) 
    1189            if attribution_lines: 
    1190                attribution, messages = self.parse_attribution( 
    1191                    attribution_lines, line_offset+attribution_offset) 
    1192                blockquote += attribution 
    1193                elements += messages 
    1194            line_offset = new_line_offset 
    1195            while indented and not indented[0]: 
    1196                indented = indented[1:] 
    1197                line_offset += 1 
    1198        return elements 
    1199 
    1200    # U+2014 is an em-dash: 
    1201    attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])') 
    1202 
    1203    def split_attribution(self, indented, line_offset): 
    1204        """ 
    1205        Check for a block quote attribution and split it off: 
    1206 
    1207        * First line after a blank line must begin with a dash ("--", "---", 
    1208          em-dash; matches `self.attribution_pattern`). 
    1209        * Every line after that must have consistent indentation. 
    1210        * Attributions must be preceded by block quote content. 
    1211 
    1212        Return a tuple of: (block quote content lines, attribution lines, 
    1213        attribution offset, remaining indented lines, remaining lines offset). 
    1214        """ 
    1215        blank = None 
    1216        nonblank_seen = False 
    1217        for i in range(len(indented)): 
    1218            line = indented[i].rstrip() 
    1219            if line: 
    1220                if nonblank_seen and blank == i - 1:  # last line blank 
    1221                    match = self.attribution_pattern.match(line) 
    1222                    if match: 
    1223                        attribution_end, indent = self.check_attribution( 
    1224                            indented, i) 
    1225                        if attribution_end: 
    1226                            a_lines = indented[i:attribution_end] 
    1227                            a_lines.trim_left(match.end(), end=1) 
    1228                            a_lines.trim_left(indent, start=1) 
    1229                            return (indented[:i], a_lines, 
    1230                                    i, indented[attribution_end:], 
    1231                                    line_offset + attribution_end) 
    1232                nonblank_seen = True 
    1233            else: 
    1234                blank = i 
    1235        else: 
    1236            return indented, None, None, None, None 
    1237 
    1238    def check_attribution(self, indented, attribution_start): 
    1239        """ 
    1240        Check attribution shape. 
    1241        Return the index past the end of the attribution, and the indent. 
    1242        """ 
    1243        indent = None 
    1244        i = attribution_start + 1 
    1245        for i in range(attribution_start + 1, len(indented)): 
    1246            line = indented[i].rstrip() 
    1247            if not line: 
    1248                break 
    1249            if indent is None: 
    1250                indent = len(line) - len(line.lstrip()) 
    1251            elif len(line) - len(line.lstrip()) != indent: 
    1252                return None, None       # bad shape; not an attribution 
    1253        else: 
    1254            # return index of line after last attribution line: 
    1255            i += 1 
    1256        return i, (indent or 0) 
    1257 
    1258    def parse_attribution(self, indented, line_offset): 
    1259        text = '\n'.join(indented).rstrip() 
    1260        lineno = 1 + line_offset  # line_offset is zero-based 
    1261        textnodes, messages = self.inline_text(text, lineno) 
    1262        node = nodes.attribution(text, '', *textnodes) 
    1263        node.source, node.line = self.state_machine.get_source_and_line(lineno) 
    1264        return node, messages 
    1265 
    1266    def bullet(self, match, context, next_state): 
    1267        """Bullet list item.""" 
    1268        ul = nodes.bullet_list() 
    1269        ul.source, ul.line = self.state_machine.get_source_and_line() 
    1270        self.parent += ul 
    1271        ul['bullet'] = match.string[0] 
    1272        i, blank_finish = self.list_item(match.end()) 
    1273        ul += i 
    1274        offset = self.state_machine.line_offset + 1   # next line 
    1275        new_line_offset, blank_finish = self.nested_list_parse( 
    1276              self.state_machine.input_lines[offset:], 
    1277              input_offset=self.state_machine.abs_line_offset() + 1, 
    1278              node=ul, initial_state='BulletList', 
    1279              blank_finish=blank_finish) 
    1280        self.goto_line(new_line_offset) 
    1281        if not blank_finish: 
    1282            self.parent += self.unindent_warning('Bullet list') 
    1283        return [], next_state, [] 
    1284 
    1285    def list_item(self, indent): 
    1286        src, srcline = self.state_machine.get_source_and_line() 
    1287        if self.state_machine.line[indent:]: 
    1288            indented, line_offset, blank_finish = ( 
    1289                self.state_machine.get_known_indented(indent)) 
    1290        else: 
    1291            indented, indent, line_offset, blank_finish = ( 
    1292                self.state_machine.get_first_known_indented(indent)) 
    1293        listitem = nodes.list_item('\n'.join(indented)) 
    1294        listitem.source, listitem.line = src, srcline 
    1295        if indented: 
    1296            self.nested_parse(indented, input_offset=line_offset, 
    1297                              node=listitem) 
    1298        return listitem, blank_finish 
    1299 
    1300    def enumerator(self, match, context, next_state): 
    1301        """Enumerated List Item""" 
    1302        format, sequence, text, ordinal = self.parse_enumerator(match) 
    1303        if not self.is_enumerated_list_item(ordinal, sequence, format): 
    1304            raise statemachine.TransitionCorrection('text') 
    1305        enumlist = nodes.enumerated_list() 
    1306        self.parent += enumlist 
    1307        if sequence == '#': 
    1308            enumlist['enumtype'] = 'arabic' 
    1309        else: 
    1310            enumlist['enumtype'] = sequence 
    1311        enumlist['prefix'] = self.enum.formatinfo[format].prefix 
    1312        enumlist['suffix'] = self.enum.formatinfo[format].suffix 
    1313        if ordinal != 1: 
    1314            enumlist['start'] = ordinal 
    1315            msg = self.reporter.info( 
    1316                'Enumerated list start value not ordinal-1: "%s" (ordinal %s)' 
    1317                % (text, ordinal)) 
    1318            self.parent += msg 
    1319        listitem, blank_finish = self.list_item(match.end()) 
    1320        enumlist += listitem 
    1321        offset = self.state_machine.line_offset + 1   # next line 
    1322        newline_offset, blank_finish = self.nested_list_parse( 
    1323              self.state_machine.input_lines[offset:], 
    1324              input_offset=self.state_machine.abs_line_offset() + 1, 
    1325              node=enumlist, initial_state='EnumeratedList', 
    1326              blank_finish=blank_finish, 
    1327              extra_settings={'lastordinal': ordinal, 
    1328                              'format': format, 
    1329                              'auto': sequence == '#'}) 
    1330        self.goto_line(newline_offset) 
    1331        if not blank_finish: 
    1332            self.parent += self.unindent_warning('Enumerated list') 
    1333        return [], next_state, [] 
    1334 
    1335    def parse_enumerator(self, match, expected_sequence=None): 
    1336        """ 
    1337        Analyze an enumerator and return the results. 
    1338 
    1339        :Return: 
    1340            - the enumerator format ('period', 'parens', or 'rparen'), 
    1341            - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.), 
    1342            - the text of the enumerator, stripped of formatting, and 
    1343            - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.; 
    1344              ``None`` is returned for invalid enumerator text). 
    1345 
    1346        The enumerator format has already been determined by the regular 
    1347        expression match. If `expected_sequence` is given, that sequence is 
    1348        tried first. If not, we check for Roman numeral 1. This way, 
    1349        single-character Roman numerals (which are also alphabetical) can be 
    1350        matched. If no sequence has been matched, all sequences are checked in 
    1351        order. 
    1352        """ 
    1353        groupdict = match.groupdict() 
    1354        sequence = '' 
    1355        for format in self.enum.formats: 
    1356            if groupdict[format]:       # was this the format matched? 
    1357                break                   # yes; keep `format` 
    1358        else:                           # shouldn't happen 
    1359            raise ParserError('enumerator format not matched') 
    1360        text = groupdict[format][self.enum.formatinfo[format].start     # noqa: E203,E501 
    1361                                 : self.enum.formatinfo[format].end] 
    1362        if text == '#': 
    1363            sequence = '#' 
    1364        elif expected_sequence: 
    1365            try: 
    1366                if self.enum.sequenceregexps[expected_sequence].match(text): 
    1367                    sequence = expected_sequence 
    1368            except KeyError:            # shouldn't happen 
    1369                raise ParserError('unknown enumerator sequence: %s' 
    1370                                  % sequence) 
    1371        elif text == 'i': 
    1372            sequence = 'lowerroman' 
    1373        elif text == 'I': 
    1374            sequence = 'upperroman' 
    1375        if not sequence: 
    1376            for sequence in self.enum.sequences: 
    1377                if self.enum.sequenceregexps[sequence].match(text): 
    1378                    break 
    1379            else:                       # shouldn't happen 
    1380                raise ParserError('enumerator sequence not matched') 
    1381        if sequence == '#': 
    1382            ordinal = 1 
    1383        else: 
    1384            try: 
    1385                ordinal = self.enum.converters[sequence](text) 
    1386            except roman.InvalidRomanNumeralError: 
    1387                ordinal = None 
    1388        return format, sequence, text, ordinal 
    1389 
    1390    def is_enumerated_list_item(self, ordinal, sequence, format): 
    1391        """ 
    1392        Check validity based on the ordinal value and the second line. 
    1393 
    1394        Return true if the ordinal is valid and the second line is blank, 
    1395        indented, or starts with the next enumerator or an auto-enumerator. 
    1396        """ 
    1397        if ordinal is None: 
    1398            return None 
    1399        try: 
    1400            next_line = self.state_machine.next_line() 
    1401        except EOFError:              # end of input lines 
    1402            self.state_machine.previous_line() 
    1403            return 1 
    1404        else: 
    1405            self.state_machine.previous_line() 
    1406        if not next_line[:1].strip():   # blank or indented 
    1407            return 1 
    1408        result = self.make_enumerator(ordinal + 1, sequence, format) 
    1409        if result: 
    1410            next_enumerator, auto_enumerator = result 
    1411            try: 
    1412                if (next_line.startswith(next_enumerator) 
    1413                    or next_line.startswith(auto_enumerator)): 
    1414                    return 1 
    1415            except TypeError: 
    1416                pass 
    1417        return None 
    1418 
    1419    def make_enumerator(self, ordinal, sequence, format): 
    1420        """ 
    1421        Construct and return the next enumerated list item marker, and an 
    1422        auto-enumerator ("#" instead of the regular enumerator). 
    1423 
    1424        Return ``None`` for invalid (out of range) ordinals. 
    1425        """ 
    1426        if sequence == '#': 
    1427            enumerator = '#' 
    1428        elif sequence == 'arabic': 
    1429            enumerator = str(ordinal) 
    1430        else: 
    1431            if sequence.endswith('alpha'): 
    1432                if ordinal > 26: 
    1433                    return None 
    1434                enumerator = chr(ordinal + ord('a') - 1) 
    1435            elif sequence.endswith('roman'): 
    1436                try: 
    1437                    enumerator = roman.toRoman(ordinal) 
    1438                except roman.RomanError: 
    1439                    return None 
    1440            else:                       # shouldn't happen 
    1441                raise ParserError('unknown enumerator sequence: "%s"' 
    1442                                  % sequence) 
    1443            if sequence.startswith('lower'): 
    1444                enumerator = enumerator.lower() 
    1445            elif sequence.startswith('upper'): 
    1446                enumerator = enumerator.upper() 
    1447            else:                       # shouldn't happen 
    1448                raise ParserError('unknown enumerator sequence: "%s"' 
    1449                                  % sequence) 
    1450        formatinfo = self.enum.formatinfo[format] 
    1451        next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix 
    1452                           + ' ') 
    1453        auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' ' 
    1454        return next_enumerator, auto_enumerator 
    1455 
    1456    def field_marker(self, match, context, next_state): 
    1457        """Field list item.""" 
    1458        field_list = nodes.field_list() 
    1459        self.parent += field_list 
    1460        field, blank_finish = self.field(match) 
    1461        field_list += field 
    1462        offset = self.state_machine.line_offset + 1   # next line 
    1463        newline_offset, blank_finish = self.nested_list_parse( 
    1464              self.state_machine.input_lines[offset:], 
    1465              input_offset=self.state_machine.abs_line_offset() + 1, 
    1466              node=field_list, initial_state='FieldList', 
    1467              blank_finish=blank_finish) 
    1468        self.goto_line(newline_offset) 
    1469        if not blank_finish: 
    1470            self.parent += self.unindent_warning('Field list') 
    1471        return [], next_state, [] 
    1472 
    1473    def field(self, match): 
    1474        name = self.parse_field_marker(match) 
    1475        src, srcline = self.state_machine.get_source_and_line() 
    1476        lineno = self.state_machine.abs_line_number() 
    1477        (indented, indent, line_offset, blank_finish 
    1478         ) = self.state_machine.get_first_known_indented(match.end()) 
    1479        field_node = nodes.field() 
    1480        field_node.source = src 
    1481        field_node.line = srcline 
    1482        name_nodes, name_messages = self.inline_text(name, lineno) 
    1483        field_node += nodes.field_name(name, '', *name_nodes) 
    1484        field_body = nodes.field_body('\n'.join(indented), *name_messages) 
    1485        field_node += field_body 
    1486        if indented: 
    1487            self.parse_field_body(indented, line_offset, field_body) 
    1488        return field_node, blank_finish 
    1489 
    1490    def parse_field_marker(self, match): 
    1491        """Extract & return field name from a field marker match.""" 
    1492        field = match.group()[1:]         # strip off leading ':' 
    1493        field = field[:field.rfind(':')]  # strip off trailing ':' etc. 
    1494        return field 
    1495 
    1496    def parse_field_body(self, indented, offset, node): 
    1497        self.nested_parse(indented, input_offset=offset, node=node) 
    1498 
    1499    def option_marker(self, match, context, next_state): 
    1500        """Option list item.""" 
    1501        optionlist = nodes.option_list() 
    1502        (optionlist.source, optionlist.line 
    1503         ) = self.state_machine.get_source_and_line() 
    1504        try: 
    1505            listitem, blank_finish = self.option_list_item(match) 
    1506        except MarkupError as error: 
    1507            # This shouldn't happen; pattern won't match. 
    1508            msg = self.reporter.error('Invalid option list marker: %s' 
    1509                                      % error) 
    1510            self.parent += msg 
    1511            (indented, indent, line_offset, blank_finish 
    1512             ) = self.state_machine.get_first_known_indented(match.end()) 
    1513            elements = self.block_quote(indented, line_offset) 
    1514            self.parent += elements 
    1515            if not blank_finish: 
    1516                self.parent += self.unindent_warning('Option list') 
    1517            return [], next_state, [] 
    1518        self.parent += optionlist 
    1519        optionlist += listitem 
    1520        offset = self.state_machine.line_offset + 1   # next line 
    1521        newline_offset, blank_finish = self.nested_list_parse( 
    1522              self.state_machine.input_lines[offset:], 
    1523              input_offset=self.state_machine.abs_line_offset() + 1, 
    1524              node=optionlist, initial_state='OptionList', 
    1525              blank_finish=blank_finish) 
    1526        self.goto_line(newline_offset) 
    1527        if not blank_finish: 
    1528            self.parent += self.unindent_warning('Option list') 
    1529        return [], next_state, [] 
    1530 
    1531    def option_list_item(self, match): 
    1532        offset = self.state_machine.abs_line_offset() 
    1533        options = self.parse_option_marker(match) 
    1534        (indented, indent, line_offset, blank_finish 
    1535         ) = self.state_machine.get_first_known_indented(match.end()) 
    1536        if not indented:                # not an option list item 
    1537            self.goto_line(offset) 
    1538            raise statemachine.TransitionCorrection('text') 
    1539        option_group = nodes.option_group('', *options) 
    1540        description = nodes.description('\n'.join(indented)) 
    1541        option_list_item = nodes.option_list_item('', option_group, 
    1542                                                  description) 
    1543        if indented: 
    1544            self.nested_parse(indented, input_offset=line_offset, 
    1545                              node=description) 
    1546        return option_list_item, blank_finish 
    1547 
    1548    def parse_option_marker(self, match): 
    1549        """ 
    1550        Return a list of `node.option` and `node.option_argument` objects, 
    1551        parsed from an option marker match. 
    1552 
    1553        :Exception: `MarkupError` for invalid option markers. 
    1554        """ 
    1555        optlist = [] 
    1556        # split at ", ", except inside < > (complex arguments) 
    1557        optionstrings = re.split(r', (?![^<]*>)', match.group().rstrip()) 
    1558        for optionstring in optionstrings: 
    1559            tokens = optionstring.split() 
    1560            delimiter = ' ' 
    1561            firstopt = tokens[0].split('=', 1) 
    1562            if len(firstopt) > 1: 
    1563                # "--opt=value" form 
    1564                tokens[:1] = firstopt 
    1565                delimiter = '=' 
    1566            elif (len(tokens[0]) > 2 
    1567                  and ((tokens[0].startswith('-') 
    1568                        and not tokens[0].startswith('--')) 
    1569                       or tokens[0].startswith('+'))): 
    1570                # "-ovalue" form 
    1571                tokens[:1] = [tokens[0][:2], tokens[0][2:]] 
    1572                delimiter = '' 
    1573            if len(tokens) > 1 and (tokens[1].startswith('<') 
    1574                                    and tokens[-1].endswith('>')): 
    1575                # "-o <value1 value2>" form; join all values into one token 
    1576                tokens[1:] = [' '.join(tokens[1:])] 
    1577            if 0 < len(tokens) <= 2: 
    1578                option = nodes.option(optionstring) 
    1579                option += nodes.option_string(tokens[0], tokens[0]) 
    1580                if len(tokens) > 1: 
    1581                    option += nodes.option_argument(tokens[1], tokens[1], 
    1582                                                    delimiter=delimiter) 
    1583                optlist.append(option) 
    1584            else: 
    1585                raise MarkupError( 
    1586                    'wrong number of option tokens (=%s), should be 1 or 2: ' 
    1587                    '"%s"' % (len(tokens), optionstring)) 
    1588        return optlist 
    1589 
    1590    def doctest(self, match, context, next_state): 
    1591        data = '\n'.join(self.state_machine.get_text_block()) 
    1592        # TODO: prepend class value ['pycon'] (Python Console) 
    1593        # parse with `directives.body.CodeBlock` (returns literal-block 
    1594        # with class "code" and syntax highlight markup). 
    1595        self.parent += nodes.doctest_block(data, data) 
    1596        return [], next_state, [] 
    1597 
    1598    def line_block(self, match, context, next_state): 
    1599        """First line of a line block.""" 
    1600        block = nodes.line_block() 
    1601        self.parent += block 
    1602        lineno = self.state_machine.abs_line_number() 
    1603        line, messages, blank_finish = self.line_block_line(match, lineno) 
    1604        block += line 
    1605        self.parent += messages 
    1606        if not blank_finish: 
    1607            offset = self.state_machine.line_offset + 1   # next line 
    1608            new_line_offset, blank_finish = self.nested_list_parse( 
    1609                  self.state_machine.input_lines[offset:], 
    1610                  input_offset=self.state_machine.abs_line_offset() + 1, 
    1611                  node=block, initial_state='LineBlock', 
    1612                  blank_finish=0) 
    1613            self.goto_line(new_line_offset) 
    1614        if not blank_finish: 
    1615            self.parent += self.reporter.warning( 
    1616                'Line block ends without a blank line.', 
    1617                line=lineno+1) 
    1618        if len(block): 
    1619            if block[0].indent is None: 
    1620                block[0].indent = 0 
    1621            self.nest_line_block_lines(block) 
    1622        return [], next_state, [] 
    1623 
    1624    def line_block_line(self, match, lineno): 
    1625        """Return one line element of a line_block.""" 
    1626        (indented, indent, line_offset, blank_finish 
    1627         ) = self.state_machine.get_first_known_indented(match.end(), 
    1628                                                         until_blank=True) 
    1629        text = '\n'.join(indented) 
    1630        text_nodes, messages = self.inline_text(text, lineno) 
    1631        line = nodes.line(text, '', *text_nodes) 
    1632        if match.string.rstrip() != '|':  # not empty 
    1633            line.indent = len(match.group(1)) - 1 
    1634        return line, messages, blank_finish 
    1635 
    1636    def nest_line_block_lines(self, block): 
    1637        for index in range(1, len(block)): 
    1638            if getattr(block[index], 'indent', None) is None: 
    1639                block[index].indent = block[index - 1].indent 
    1640        self.nest_line_block_segment(block) 
    1641 
    1642    def nest_line_block_segment(self, block): 
    1643        indents = [item.indent for item in block] 
    1644        least = min(indents) 
    1645        new_items = [] 
    1646        new_block = nodes.line_block() 
    1647        for item in block: 
    1648            if item.indent > least: 
    1649                new_block.append(item) 
    1650            else: 
    1651                if len(new_block): 
    1652                    self.nest_line_block_segment(new_block) 
    1653                    new_items.append(new_block) 
    1654                    new_block = nodes.line_block() 
    1655                new_items.append(item) 
    1656        if len(new_block): 
    1657            self.nest_line_block_segment(new_block) 
    1658            new_items.append(new_block) 
    1659        block[:] = new_items 
    1660 
    1661    def grid_table_top(self, match, context, next_state): 
    1662        """Top border of a full table.""" 
    1663        return self.table_top(match, context, next_state, 
    1664                              self.isolate_grid_table, 
    1665                              tableparser.GridTableParser) 
    1666 
    1667    def simple_table_top(self, match, context, next_state): 
    1668        """Top border of a simple table.""" 
    1669        return self.table_top(match, context, next_state, 
    1670                              self.isolate_simple_table, 
    1671                              tableparser.SimpleTableParser) 
    1672 
    1673    def table_top(self, match, context, next_state, 
    1674                  isolate_function, parser_class): 
    1675        """Top border of a generic table.""" 
    1676        nodelist, blank_finish = self.table(isolate_function, parser_class) 
    1677        self.parent += nodelist 
    1678        if not blank_finish: 
    1679            msg = self.reporter.warning( 
    1680                'Blank line required after table.', 
    1681                line=self.state_machine.abs_line_number()+1) 
    1682            self.parent += msg 
    1683        return [], next_state, [] 
    1684 
    1685    def table(self, isolate_function, parser_class): 
    1686        """Parse a table.""" 
    1687        block, messages, blank_finish = isolate_function() 
    1688        if block: 
    1689            try: 
    1690                parser = parser_class() 
    1691                tabledata = parser.parse(block) 
    1692                tableline = (self.state_machine.abs_line_number() - len(block) 
    1693                             + 1) 
    1694                table = self.build_table(tabledata, tableline) 
    1695                nodelist = [table] + messages 
    1696            except tableparser.TableMarkupError as err: 
    1697                nodelist = self.malformed_table(block, ' '.join(err.args), 
    1698                                                offset=err.offset) + messages 
    1699        else: 
    1700            nodelist = messages 
    1701        return nodelist, blank_finish 
    1702 
    1703    def isolate_grid_table(self): 
    1704        messages = [] 
    1705        blank_finish = 1 
    1706        try: 
    1707            block = self.state_machine.get_text_block(flush_left=True) 
    1708        except statemachine.UnexpectedIndentationError as err: 
    1709            block, src, srcline = err.args 
    1710            messages.append(self.reporter.error('Unexpected indentation.', 
    1711                                                source=src, line=srcline)) 
    1712            blank_finish = 0 
    1713        block.disconnect() 
    1714        # for East Asian chars: 
    1715        block.pad_double_width(self.double_width_pad_char) 
    1716        width = len(block[0].strip()) 
    1717        for i in range(len(block)): 
    1718            block[i] = block[i].strip() 
    1719            if block[i][0] not in '+|':  # check left edge 
    1720                blank_finish = 0 
    1721                self.state_machine.previous_line(len(block) - i) 
    1722                del block[i:] 
    1723                break 
    1724        if not self.grid_table_top_pat.match(block[-1]):  # find bottom 
    1725            blank_finish = 0 
    1726            # from second-last to third line of table: 
    1727            for i in range(len(block) - 2, 1, -1): 
    1728                if self.grid_table_top_pat.match(block[i]): 
    1729                    self.state_machine.previous_line(len(block) - i + 1) 
    1730                    del block[i+1:] 
    1731                    break 
    1732            else: 
    1733                messages.extend(self.malformed_table(block)) 
    1734                return [], messages, blank_finish 
    1735        for i in range(len(block)):     # check right edge 
    1736            if len(block[i]) != width or block[i][-1] not in '+|': 
    1737                messages.extend(self.malformed_table(block)) 
    1738                return [], messages, blank_finish 
    1739        return block, messages, blank_finish 
    1740 
    1741    def isolate_simple_table(self): 
    1742        start = self.state_machine.line_offset 
    1743        lines = self.state_machine.input_lines 
    1744        limit = len(lines) - 1 
    1745        toplen = len(lines[start].strip()) 
    1746        pattern_match = self.simple_table_border_pat.match 
    1747        found = 0 
    1748        found_at = None 
    1749        i = start + 1 
    1750        while i <= limit: 
    1751            line = lines[i] 
    1752            match = pattern_match(line) 
    1753            if match: 
    1754                if len(line.strip()) != toplen: 
    1755                    self.state_machine.next_line(i - start) 
    1756                    messages = self.malformed_table( 
    1757                        lines[start:i+1], 'Bottom/header table border does ' 
    1758                        'not match top border.') 
    1759                    return [], messages, i == limit or not lines[i+1].strip() 
    1760                found += 1 
    1761                found_at = i 
    1762                if found == 2 or i == limit or not lines[i+1].strip(): 
    1763                    end = i 
    1764                    break 
    1765            i += 1 
    1766        else:                           # reached end of input_lines 
    1767            if found: 
    1768                extra = ' or no blank line after table bottom' 
    1769                self.state_machine.next_line(found_at - start) 
    1770                block = lines[start:found_at+1] 
    1771            else: 
    1772                extra = '' 
    1773                self.state_machine.next_line(i - start - 1) 
    1774                block = lines[start:] 
    1775            messages = self.malformed_table( 
    1776                block, 'No bottom table border found%s.' % extra) 
    1777            return [], messages, not extra 
    1778        self.state_machine.next_line(end - start) 
    1779        block = lines[start:end+1] 
    1780        # for East Asian chars: 
    1781        block.pad_double_width(self.double_width_pad_char) 
    1782        return block, [], end == limit or not lines[end+1].strip() 
    1783 
    1784    def malformed_table(self, block, detail='', offset=0): 
    1785        block.replace(self.double_width_pad_char, '') 
    1786        data = '\n'.join(block) 
    1787        message = 'Malformed table.' 
    1788        startline = self.state_machine.abs_line_number() - len(block) + 1 
    1789        if detail: 
    1790            message += '\n' + detail 
    1791        error = self.reporter.error(message, nodes.literal_block(data, data), 
    1792                                    line=startline+offset) 
    1793        return [error] 
    1794 
    1795    def build_table(self, tabledata, tableline, stub_columns=0, widths=None): 
    1796        colwidths, headrows, bodyrows = tabledata 
    1797        table = nodes.table() 
    1798        if widths == 'auto': 
    1799            table['classes'] += ['colwidths-auto'] 
    1800        elif widths:  # "grid" or list of integers 
    1801            table['classes'] += ['colwidths-given'] 
    1802        tgroup = nodes.tgroup(cols=len(colwidths)) 
    1803        table += tgroup 
    1804        for colwidth in colwidths: 
    1805            colspec = nodes.colspec(colwidth=colwidth) 
    1806            if stub_columns: 
    1807                colspec.attributes['stub'] = 1 
    1808                stub_columns -= 1 
    1809            tgroup += colspec 
    1810        if headrows: 
    1811            thead = nodes.thead() 
    1812            tgroup += thead 
    1813            for row in headrows: 
    1814                thead += self.build_table_row(row, tableline) 
    1815        tbody = nodes.tbody() 
    1816        tgroup += tbody 
    1817        for row in bodyrows: 
    1818            tbody += self.build_table_row(row, tableline) 
    1819        return table 
    1820 
    1821    def build_table_row(self, rowdata, tableline): 
    1822        row = nodes.row() 
    1823        for cell in rowdata: 
    1824            if cell is None: 
    1825                continue 
    1826            morerows, morecols, offset, cellblock = cell 
    1827            attributes = {} 
    1828            if morerows: 
    1829                attributes['morerows'] = morerows 
    1830            if morecols: 
    1831                attributes['morecols'] = morecols 
    1832            entry = nodes.entry(**attributes) 
    1833            row += entry 
    1834            if ''.join(cellblock): 
    1835                self.nested_parse(cellblock, input_offset=tableline+offset, 
    1836                                  node=entry) 
    1837        return row 
    1838 
    1839    explicit = Struct() 
    1840    """Patterns and constants used for explicit markup recognition.""" 
    1841 
    1842    explicit.patterns = Struct( 
    1843          target=re.compile(r""" 
    1844                            ( 
    1845                              _               # anonymous target 
    1846                            |               # *OR* 
    1847                              (?!_)           # no underscore at the beginning 
    1848                              (?P<quote>`?)   # optional open quote 
    1849                              (?![ `])        # first char. not space or 
    1850                                              # backquote 
    1851                              (?P<name>       # reference name 
    1852                                .+? 
    1853                              ) 
    1854                              %(non_whitespace_escape_before)s 
    1855                              (?P=quote)      # close quote if open quote used 
    1856                            ) 
    1857                            (?<!(?<!\x00):) # no unescaped colon at end 
    1858                            %(non_whitespace_escape_before)s 
    1859                            [ ]?            # optional space 
    1860                            :               # end of reference name 
    1861                            ([ ]+|$)        # followed by whitespace 
    1862                            """ % vars(Inliner), re.VERBOSE), 
    1863          reference=re.compile(r""" 
    1864                               ( 
    1865                                 (?P<simple>%(simplename)s)_ 
    1866                               |                  # *OR* 
    1867                                 `                  # open backquote 
    1868                                 (?![ ])            # not space 
    1869                                 (?P<phrase>.+?)    # hyperlink phrase 
    1870                                 %(non_whitespace_escape_before)s 
    1871                                 `_                 # close backquote, 
    1872                                                    # reference mark 
    1873                               ) 
    1874                               $                  # end of string 
    1875                               """ % vars(Inliner), re.VERBOSE), 
    1876          substitution=re.compile(r""" 
    1877                                  ( 
    1878                                    (?![ ])          # first char. not space 
    1879                                    (?P<name>.+?)    # substitution text 
    1880                                    %(non_whitespace_escape_before)s 
    1881                                    \|               # close delimiter 
    1882                                  ) 
    1883                                  ([ ]+|$)           # followed by whitespace 
    1884                                  """ % vars(Inliner), 
    1885                                  re.VERBOSE),) 
    1886 
    1887    def footnote(self, match): 
    1888        src, srcline = self.state_machine.get_source_and_line() 
    1889        (indented, indent, offset, blank_finish 
    1890         ) = self.state_machine.get_first_known_indented(match.end()) 
    1891        label = match.group(1) 
    1892        name = normalize_name(label) 
    1893        footnote = nodes.footnote('\n'.join(indented)) 
    1894        footnote.source = src 
    1895        footnote.line = srcline 
    1896        if name[0] == '#':              # auto-numbered 
    1897            name = name[1:]             # autonumber label 
    1898            footnote['auto'] = 1 
    1899            if name: 
    1900                footnote['names'].append(name) 
    1901            self.document.note_autofootnote(footnote) 
    1902        elif name == '*':               # auto-symbol 
    1903            name = '' 
    1904            footnote['auto'] = '*' 
    1905            self.document.note_symbol_footnote(footnote) 
    1906        else:                           # manually numbered 
    1907            footnote += nodes.label('', label) 
    1908            footnote['names'].append(name) 
    1909            self.document.note_footnote(footnote) 
    1910        if name: 
    1911            self.document.note_explicit_target(footnote, footnote) 
    1912        else: 
    1913            self.document.set_id(footnote, footnote) 
    1914        if indented: 
    1915            self.nested_parse(indented, input_offset=offset, node=footnote) 
    1916        return [footnote], blank_finish 
    1917 
    1918    def citation(self, match): 
    1919        src, srcline = self.state_machine.get_source_and_line() 
    1920        (indented, indent, offset, blank_finish 
    1921         ) = self.state_machine.get_first_known_indented(match.end()) 
    1922        label = match.group(1) 
    1923        name = normalize_name(label) 
    1924        citation = nodes.citation('\n'.join(indented)) 
    1925        citation.source = src 
    1926        citation.line = srcline 
    1927        citation += nodes.label('', label) 
    1928        citation['names'].append(name) 
    1929        self.document.note_citation(citation) 
    1930        self.document.note_explicit_target(citation, citation) 
    1931        if indented: 
    1932            self.nested_parse(indented, input_offset=offset, node=citation) 
    1933        return [citation], blank_finish 
    1934 
    1935    def hyperlink_target(self, match): 
    1936        pattern = self.explicit.patterns.target 
    1937        lineno = self.state_machine.abs_line_number() 
    1938        (block, indent, offset, blank_finish 
    1939         ) = self.state_machine.get_first_known_indented( 
    1940                 match.end(), until_blank=True, strip_indent=False) 
    1941        blocktext = match.string[:match.end()] + '\n'.join(block) 
    1942        block = [escape2null(line) for line in block] 
    1943        escaped = block[0] 
    1944        blockindex = 0 
    1945        while True: 
    1946            targetmatch = pattern.match(escaped) 
    1947            if targetmatch: 
    1948                break 
    1949            blockindex += 1 
    1950            try: 
    1951                escaped += block[blockindex] 
    1952            except IndexError: 
    1953                raise MarkupError('malformed hyperlink target.') 
    1954        del block[:blockindex] 
    1955        block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip() 
    1956        target = self.make_target(block, blocktext, lineno, 
    1957                                  targetmatch.group('name')) 
    1958        return [target], blank_finish 
    1959 
    1960    def make_target(self, block, block_text, lineno, target_name): 
    1961        target_type, data = self.parse_target(block, block_text, lineno) 
    1962        if target_type == 'refname': 
    1963            target = nodes.target(block_text, '', refname=normalize_name(data)) 
    1964            target.indirect_reference_name = data 
    1965            self.add_target(target_name, '', target, lineno) 
    1966            self.document.note_indirect_target(target) 
    1967            return target 
    1968        elif target_type == 'refuri': 
    1969            target = nodes.target(block_text, '') 
    1970            self.add_target(target_name, data, target, lineno) 
    1971            return target 
    1972        else: 
    1973            return data 
    1974 
    1975    def parse_target(self, block, block_text, lineno): 
    1976        """ 
    1977        Determine the type of reference of a target. 
    1978 
    1979        :Return: A 2-tuple, one of: 
    1980 
    1981            - 'refname' and the indirect reference name 
    1982            - 'refuri' and the URI 
    1983            - 'malformed' and a system_message node 
    1984        """ 
    1985        if block and block[-1].strip()[-1:] == '_':  # possible indirect target 
    1986            reference = ' '.join(line.strip() for line in block) 
    1987            refname = self.is_reference(reference) 
    1988            if refname: 
    1989                return 'refname', refname 
    1990        ref_parts = split_escaped_whitespace(' '.join(block)) 
    1991        reference = ' '.join(''.join(unescape(part).split()) 
    1992                             for part in ref_parts) 
    1993        return 'refuri', reference 
    1994 
    1995    def is_reference(self, reference): 
    1996        match = self.explicit.patterns.reference.match( 
    1997            whitespace_normalize_name(reference)) 
    1998        if not match: 
    1999            return None 
    2000        return unescape(match.group('simple') or match.group('phrase')) 
    2001 
    2002    def add_target(self, targetname, refuri, target, lineno): 
    2003        target.line = lineno 
    2004        if targetname: 
    2005            name = normalize_name(unescape(targetname)) 
    2006            target['names'].append(name) 
    2007            if refuri: 
    2008                uri = self.inliner.adjust_uri(refuri) 
    2009                if uri: 
    2010                    target['refuri'] = uri 
    2011                else: 
    2012                    raise ApplicationError('problem with URI: %r' % refuri) 
    2013            self.document.note_explicit_target(target, self.parent) 
    2014        else:                       # anonymous target 
    2015            if refuri: 
    2016                target['refuri'] = refuri 
    2017            target['anonymous'] = 1 
    2018            self.document.note_anonymous_target(target) 
    2019 
    2020    def substitution_def(self, match): 
    2021        pattern = self.explicit.patterns.substitution 
    2022        src, srcline = self.state_machine.get_source_and_line() 
    2023        (block, indent, offset, blank_finish 
    2024         ) = self.state_machine.get_first_known_indented(match.end(), 
    2025                                                         strip_indent=False) 
    2026        blocktext = (match.string[:match.end()] + '\n'.join(block)) 
    2027        block.disconnect() 
    2028        escaped = escape2null(block[0].rstrip()) 
    2029        blockindex = 0 
    2030        while True: 
    2031            subdefmatch = pattern.match(escaped) 
    2032            if subdefmatch: 
    2033                break 
    2034            blockindex += 1 
    2035            try: 
    2036                escaped = escaped + ' ' + escape2null( 
    2037                                              block[blockindex].strip()) 
    2038            except IndexError: 
    2039                raise MarkupError('malformed substitution definition.') 
    2040        del block[:blockindex]          # strip out the substitution marker 
    2041        start = subdefmatch.end()-len(escaped)-1 
    2042        block[0] = (block[0].strip() + ' ')[start:-1] 
    2043        if not block[0]: 
    2044            del block[0] 
    2045            offset += 1 
    2046        while block and not block[-1].strip(): 
    2047            block.pop() 
    2048        subname = subdefmatch.group('name') 
    2049        substitution_node = nodes.substitution_definition(blocktext) 
    2050        substitution_node.source = src 
    2051        substitution_node.line = srcline 
    2052        if not block: 
    2053            msg = self.reporter.warning( 
    2054                'Substitution definition "%s" missing contents.' % subname, 
    2055                nodes.literal_block(blocktext, blocktext), 
    2056                source=src, line=srcline) 
    2057            return [msg], blank_finish 
    2058        block[0] = block[0].strip() 
    2059        substitution_node['names'].append( 
    2060            nodes.whitespace_normalize_name(subname)) 
    2061        new_abs_offset, blank_finish = self.nested_list_parse( 
    2062              block, input_offset=offset, node=substitution_node, 
    2063              initial_state='SubstitutionDef', blank_finish=blank_finish) 
    2064        i = 0 
    2065        for node in substitution_node[:]: 
    2066            if not (isinstance(node, nodes.Inline) 
    2067                    or isinstance(node, nodes.Text)): 
    2068                self.parent += substitution_node[i] 
    2069                del substitution_node[i] 
    2070            else: 
    2071                i += 1 
    2072        for node in substitution_node.findall(nodes.Element): 
    2073            if self.disallowed_inside_substitution_definitions(node): 
    2074                pformat = nodes.literal_block('', node.pformat().rstrip()) 
    2075                msg = self.reporter.error( 
    2076                    'Substitution definition contains illegal element <%s>:' 
    2077                    % node.tagname, 
    2078                    pformat, nodes.literal_block(blocktext, blocktext), 
    2079                    source=src, line=srcline) 
    2080                return [msg], blank_finish 
    2081        if len(substitution_node) == 0: 
    2082            msg = self.reporter.warning( 
    2083                  'Substitution definition "%s" empty or invalid.' % subname, 
    2084                  nodes.literal_block(blocktext, blocktext), 
    2085                  source=src, line=srcline) 
    2086            return [msg], blank_finish 
    2087        self.document.note_substitution_def( 
    2088            substitution_node, subname, self.parent) 
    2089        return [substitution_node], blank_finish 
    2090 
    2091    def disallowed_inside_substitution_definitions(self, node): 
    2092        if (node['ids'] 
    2093            or isinstance(node, nodes.reference) and node.get('anonymous') 
    2094            or isinstance(node, nodes.footnote_reference) and node.get('auto')):  # noqa: E501 
    2095            return True 
    2096        else: 
    2097            return False 
    2098 
    2099    def directive(self, match, **option_presets): 
    2100        """Returns a 2-tuple: list of nodes, and a "blank finish" boolean.""" 
    2101        type_name = match.group(1) 
    2102        directive_class, messages = directives.directive( 
    2103            type_name, self.memo.language, self.document) 
    2104        self.parent += messages 
    2105        if directive_class: 
    2106            return self.run_directive( 
    2107                directive_class, match, type_name, option_presets) 
    2108        else: 
    2109            return self.unknown_directive(type_name) 
    2110 
    2111    def run_directive(self, directive, match, type_name, option_presets): 
    2112        """ 
    2113        Parse a directive then run its directive function. 
    2114 
    2115        Parameters: 
    2116 
    2117        - `directive`: The class implementing the directive.  Must be 
    2118          a subclass of `rst.Directive`. 
    2119 
    2120        - `match`: A regular expression match object which matched the first 
    2121          line of the directive. 
    2122 
    2123        - `type_name`: The directive name, as used in the source text. 
    2124 
    2125        - `option_presets`: A dictionary of preset options, defaults for the 
    2126          directive options.  Currently, only an "alt" option is passed by 
    2127          substitution definitions (value: the substitution name), which may 
    2128          be used by an embedded image directive. 
    2129 
    2130        Returns a 2-tuple: list of nodes, and a "blank finish" boolean. 
    2131        """ 
    2132        if isinstance(directive, (FunctionType, MethodType)): 
    2133            from docutils.parsers.rst import convert_directive_function 
    2134            directive = convert_directive_function(directive) 
    2135        lineno = self.state_machine.abs_line_number() 
    2136        initial_line_offset = self.state_machine.line_offset 
    2137        (indented, indent, line_offset, blank_finish 
    2138         ) = self.state_machine.get_first_known_indented(match.end(), 
    2139                                                         strip_top=0) 
    2140        block_text = '\n'.join(self.state_machine.input_lines[ 
    2141            initial_line_offset : self.state_machine.line_offset + 1])  # noqa: E203,E501 
    2142        try: 
    2143            arguments, options, content, content_offset = ( 
    2144                self.parse_directive_block(indented, line_offset, 
    2145                                           directive, option_presets)) 
    2146        except MarkupError as detail: 
    2147            error = self.reporter.error( 
    2148                'Error in "%s" directive:\n%s.' % (type_name, 
    2149                                                   ' '.join(detail.args)), 
    2150                nodes.literal_block(block_text, block_text), line=lineno) 
    2151            return [error], blank_finish 
    2152        directive_instance = directive( 
    2153            type_name, arguments, options, content, lineno, 
    2154            content_offset, block_text, self, self.state_machine) 
    2155        try: 
    2156            result = directive_instance.run() 
    2157        except docutils.parsers.rst.DirectiveError as error: 
    2158            msg_node = self.reporter.system_message(error.level, error.msg, 
    2159                                                    line=lineno) 
    2160            msg_node += nodes.literal_block(block_text, block_text) 
    2161            result = [msg_node] 
    2162        assert isinstance(result, list), \ 
    2163               'Directive "%s" must return a list of nodes.' % type_name 
    2164        for i in range(len(result)): 
    2165            assert isinstance(result[i], nodes.Node), \ 
    2166                   ('Directive "%s" returned non-Node object (index %s): %r' 
    2167                    % (type_name, i, result[i])) 
    2168        return (result, 
    2169                blank_finish or self.state_machine.is_next_line_blank()) 
    2170 
    2171    def parse_directive_block(self, indented, line_offset, directive, 
    2172                              option_presets): 
    2173        option_spec = directive.option_spec 
    2174        has_content = directive.has_content 
    2175        if indented and not indented[0].strip(): 
    2176            indented.trim_start() 
    2177            line_offset += 1 
    2178        while indented and not indented[-1].strip(): 
    2179            indented.trim_end() 
    2180        if indented and (directive.required_arguments 
    2181                         or directive.optional_arguments 
    2182                         or option_spec): 
    2183            for i, line in enumerate(indented): 
    2184                if not line.strip(): 
    2185                    break 
    2186            else: 
    2187                i += 1 
    2188            arg_block = indented[:i] 
    2189            content = indented[i+1:] 
    2190            content_offset = line_offset + i + 1 
    2191        else: 
    2192            content = indented 
    2193            content_offset = line_offset 
    2194            arg_block = [] 
    2195        if option_spec: 
    2196            options, arg_block = self.parse_directive_options( 
    2197                option_presets, option_spec, arg_block) 
    2198        else: 
    2199            options = {} 
    2200        if arg_block and not (directive.required_arguments 
    2201                              or directive.optional_arguments): 
    2202            content = arg_block + indented[i:] 
    2203            content_offset = line_offset 
    2204            arg_block = [] 
    2205        while content and not content[0].strip(): 
    2206            content.trim_start() 
    2207            content_offset += 1 
    2208        if directive.required_arguments or directive.optional_arguments: 
    2209            arguments = self.parse_directive_arguments( 
    2210                directive, arg_block) 
    2211        else: 
    2212            arguments = [] 
    2213        if content and not has_content: 
    2214            raise MarkupError('no content permitted') 
    2215        return arguments, options, content, content_offset 
    2216 
    2217    def parse_directive_options(self, option_presets, option_spec, arg_block): 
    2218        options = option_presets.copy() 
    2219        for i, line in enumerate(arg_block): 
    2220            if re.match(Body.patterns['field_marker'], line): 
    2221                opt_block = arg_block[i:] 
    2222                arg_block = arg_block[:i] 
    2223                break 
    2224        else: 
    2225            opt_block = [] 
    2226        if opt_block: 
    2227            success, data = self.parse_extension_options(option_spec, 
    2228                                                         opt_block) 
    2229            if success:                 # data is a dict of options 
    2230                options.update(data) 
    2231            else:                       # data is an error string 
    2232                raise MarkupError(data) 
    2233        return options, arg_block 
    2234 
    2235    def parse_directive_arguments(self, directive, arg_block): 
    2236        required = directive.required_arguments 
    2237        optional = directive.optional_arguments 
    2238        arg_text = '\n'.join(arg_block) 
    2239        arguments = arg_text.split() 
    2240        if len(arguments) < required: 
    2241            raise MarkupError('%s argument(s) required, %s supplied' 
    2242                              % (required, len(arguments))) 
    2243        elif len(arguments) > required + optional: 
    2244            if directive.final_argument_whitespace: 
    2245                arguments = arg_text.split(None, required + optional - 1) 
    2246            else: 
    2247                raise MarkupError( 
    2248                    'maximum %s argument(s) allowed, %s supplied' 
    2249                    % (required + optional, len(arguments))) 
    2250        return arguments 
    2251 
    2252    def parse_extension_options(self, option_spec, datalines): 
    2253        """ 
    2254        Parse `datalines` for a field list containing extension options 
    2255        matching `option_spec`. 
    2256 
    2257        :Parameters: 
    2258            - `option_spec`: a mapping of option name to conversion 
    2259              function, which should raise an exception on bad input. 
    2260            - `datalines`: a list of input strings. 
    2261 
    2262        :Return: 
    2263            - Success value, 1 or 0. 
    2264            - An option dictionary on success, an error string on failure. 
    2265        """ 
    2266        node = nodes.field_list() 
    2267        newline_offset, blank_finish = self.nested_list_parse( 
    2268              datalines, 0, node, initial_state='ExtensionOptions', 
    2269              blank_finish=True) 
    2270        if newline_offset != len(datalines):  # incomplete parse of block 
    2271            return 0, 'invalid option block' 
    2272        try: 
    2273            options = utils.extract_extension_options(node, option_spec) 
    2274        except KeyError as detail: 
    2275            return 0, 'unknown option: "%s"' % detail.args[0] 
    2276        except (ValueError, TypeError) as detail: 
    2277            return 0, 'invalid option value: %s' % ' '.join(detail.args) 
    2278        except utils.ExtensionOptionError as detail: 
    2279            return 0, 'invalid option data: %s' % ' '.join(detail.args) 
    2280        if blank_finish: 
    2281            return 1, options 
    2282        else: 
    2283            return 0, 'option data incompletely parsed' 
    2284 
    2285    def unknown_directive(self, type_name): 
    2286        lineno = self.state_machine.abs_line_number() 
    2287        (indented, indent, offset, blank_finish 
    2288         ) = self.state_machine.get_first_known_indented(0, strip_indent=False) 
    2289        text = '\n'.join(indented) 
    2290        error = self.reporter.error('Unknown directive type "%s".' % type_name, 
    2291                                    nodes.literal_block(text, text), 
    2292                                    line=lineno) 
    2293        return [error], blank_finish 
    2294 
    2295    def comment(self, match): 
    2296        if self.state_machine.is_next_line_blank(): 
    2297            first_comment_line = match.string[match.end():] 
    2298            if not first_comment_line.strip():  # empty comment 
    2299                return [nodes.comment()], True  # "A tiny but practical wart." 
    2300            if first_comment_line.startswith('end of inclusion from "'): 
    2301                # cf. parsers.rst.directives.misc.Include 
    2302                self.document.include_log.pop() 
    2303                return [], True 
    2304        (indented, indent, offset, blank_finish 
    2305         ) = self.state_machine.get_first_known_indented(match.end()) 
    2306        while indented and not indented[-1].strip(): 
    2307            indented.trim_end() 
    2308        text = '\n'.join(indented) 
    2309        return [nodes.comment(text, text)], blank_finish 
    2310 
    2311    explicit.constructs = [ 
    2312          (footnote, 
    2313           re.compile(r""" 
    2314                      \.\.[ ]+          # explicit markup start 
    2315                      \[ 
    2316                      (                 # footnote label: 
    2317                          [0-9]+          # manually numbered footnote 
    2318                        |               # *OR* 
    2319                          \#              # anonymous auto-numbered footnote 
    2320                        |               # *OR* 
    2321                          \#%s            # auto-number ed?) footnote label 
    2322                        |               # *OR* 
    2323                          \*              # auto-symbol footnote 
    2324                      ) 
    2325                      \] 
    2326                      ([ ]+|$)          # whitespace or end of line 
    2327                      """ % Inliner.simplename, re.VERBOSE)), 
    2328          (citation, 
    2329           re.compile(r""" 
    2330                      \.\.[ ]+          # explicit markup start 
    2331                      \[(%s)\]          # citation label 
    2332                      ([ ]+|$)          # whitespace or end of line 
    2333                      """ % Inliner.simplename, re.VERBOSE)), 
    2334          (hyperlink_target, 
    2335           re.compile(r""" 
    2336                      \.\.[ ]+          # explicit markup start 
    2337                      _                 # target indicator 
    2338                      (?![ ]|$)         # first char. not space or EOL 
    2339                      """, re.VERBOSE)), 
    2340          (substitution_def, 
    2341           re.compile(r""" 
    2342                      \.\.[ ]+          # explicit markup start 
    2343                      \|                # substitution indicator 
    2344                      (?![ ]|$)         # first char. not space or EOL 
    2345                      """, re.VERBOSE)), 
    2346          (directive, 
    2347           re.compile(r""" 
    2348                      \.\.[ ]+          # explicit markup start 
    2349                      (%s)              # directive name 
    2350                      [ ]?              # optional space 
    2351                      ::                # directive delimiter 
    2352                      ([ ]+|$)          # whitespace or end of line 
    2353                      """ % Inliner.simplename, re.VERBOSE))] 
    2354 
    2355    def explicit_markup(self, match, context, next_state): 
    2356        """Footnotes, hyperlink targets, directives, comments.""" 
    2357        nodelist, blank_finish = self.explicit_construct(match) 
    2358        self.parent += nodelist 
    2359        self.explicit_list(blank_finish) 
    2360        return [], next_state, [] 
    2361 
    2362    def explicit_construct(self, match): 
    2363        """Determine which explicit construct this is, parse & return it.""" 
    2364        errors = [] 
    2365        for method, pattern in self.explicit.constructs: 
    2366            expmatch = pattern.match(match.string) 
    2367            if expmatch: 
    2368                try: 
    2369                    return method(self, expmatch) 
    2370                except MarkupError as error: 
    2371                    lineno = self.state_machine.abs_line_number() 
    2372                    message = ' '.join(error.args) 
    2373                    errors.append(self.reporter.warning(message, line=lineno)) 
    2374                    break 
    2375        nodelist, blank_finish = self.comment(match) 
    2376        return nodelist + errors, blank_finish 
    2377 
    2378    def explicit_list(self, blank_finish): 
    2379        """ 
    2380        Create a nested state machine for a series of explicit markup 
    2381        constructs (including anonymous hyperlink targets). 
    2382        """ 
    2383        offset = self.state_machine.line_offset + 1   # next line 
    2384        newline_offset, blank_finish = self.nested_list_parse( 
    2385              self.state_machine.input_lines[offset:], 
    2386              input_offset=self.state_machine.abs_line_offset() + 1, 
    2387              node=self.parent, initial_state='Explicit', 
    2388              blank_finish=blank_finish, 
    2389              match_titles=self.state_machine.match_titles) 
    2390        self.goto_line(newline_offset) 
    2391        if not blank_finish: 
    2392            self.parent += self.unindent_warning('Explicit markup') 
    2393 
    2394    def anonymous(self, match, context, next_state): 
    2395        """Anonymous hyperlink targets.""" 
    2396        nodelist, blank_finish = self.anonymous_target(match) 
    2397        self.parent += nodelist 
    2398        self.explicit_list(blank_finish) 
    2399        return [], next_state, [] 
    2400 
    2401    def anonymous_target(self, match): 
    2402        lineno = self.state_machine.abs_line_number() 
    2403        (block, indent, offset, blank_finish 
    2404         ) = self.state_machine.get_first_known_indented(match.end(), 
    2405                                                         until_blank=True) 
    2406        blocktext = match.string[:match.end()] + '\n'.join(block) 
    2407        block = [escape2null(line) for line in block] 
    2408        target = self.make_target(block, blocktext, lineno, '') 
    2409        return [target], blank_finish 
    2410 
    2411    def line(self, match, context, next_state): 
    2412        """Section title overline or transition marker.""" 
    2413        if self.state_machine.match_titles: 
    2414            return [match.string], 'Line', [] 
    2415        elif match.string.strip() == '::': 
    2416            raise statemachine.TransitionCorrection('text') 
    2417        elif len(match.string.strip()) < 4: 
    2418            msg = self.reporter.info( 
    2419                'Unexpected possible title overline or transition.\n' 
    2420                "Treating it as ordinary text because it's so short.", 
    2421                line=self.state_machine.abs_line_number()) 
    2422            self.parent += msg 
    2423            raise statemachine.TransitionCorrection('text') 
    2424        else: 
    2425            blocktext = self.state_machine.line 
    2426            msg = self.reporter.severe( 
    2427                  'Unexpected section title or transition.', 
    2428                  nodes.literal_block(blocktext, blocktext), 
    2429                  line=self.state_machine.abs_line_number()) 
    2430            self.parent += msg 
    2431            return [], next_state, [] 
    2432 
    2433    def text(self, match, context, next_state): 
    2434        """Titles, definition lists, paragraphs.""" 
    2435        return [match.string], 'Text', [] 
    2436 
    2437 
    2438class RFC2822Body(Body): 
    2439 
    2440    """ 
    2441    RFC2822 headers are only valid as the first constructs in documents.  As 
    2442    soon as anything else appears, the `Body` state should take over. 
    2443    """ 
    2444 
    2445    patterns = Body.patterns.copy()     # can't modify the original 
    2446    patterns['rfc2822'] = r'[!-9;-~]+:( +|$)' 
    2447    initial_transitions = [(name, 'Body') 
    2448                           for name in Body.initial_transitions] 
    2449    initial_transitions.insert(-1, ('rfc2822', 'Body'))  # just before 'text' 
    2450 
    2451    def rfc2822(self, match, context, next_state): 
    2452        """RFC2822-style field list item.""" 
    2453        fieldlist = nodes.field_list(classes=['rfc2822']) 
    2454        self.parent += fieldlist 
    2455        field, blank_finish = self.rfc2822_field(match) 
    2456        fieldlist += field 
    2457        offset = self.state_machine.line_offset + 1  # next line 
    2458        newline_offset, blank_finish = self.nested_list_parse( 
    2459              self.state_machine.input_lines[offset:], 
    2460              input_offset=self.state_machine.abs_line_offset() + 1, 
    2461              node=fieldlist, initial_state='RFC2822List', 
    2462              blank_finish=blank_finish) 
    2463        self.goto_line(newline_offset) 
    2464        if not blank_finish: 
    2465            self.parent += self.unindent_warning( 
    2466                  'RFC2822-style field list') 
    2467        return [], next_state, [] 
    2468 
    2469    def rfc2822_field(self, match): 
    2470        name = match.string[:match.string.find(':')] 
    2471        (indented, indent, line_offset, blank_finish 
    2472         ) = self.state_machine.get_first_known_indented(match.end(), 
    2473                                                         until_blank=True) 
    2474        fieldnode = nodes.field() 
    2475        fieldnode += nodes.field_name(name, name) 
    2476        fieldbody = nodes.field_body('\n'.join(indented)) 
    2477        fieldnode += fieldbody 
    2478        if indented: 
    2479            self.nested_parse(indented, input_offset=line_offset, 
    2480                              node=fieldbody) 
    2481        return fieldnode, blank_finish 
    2482 
    2483 
    2484class SpecializedBody(Body): 
    2485 
    2486    """ 
    2487    Superclass for second and subsequent compound element members.  Compound 
    2488    elements are lists and list-like constructs. 
    2489 
    2490    All transition methods are disabled (redefined as `invalid_input`). 
    2491    Override individual methods in subclasses to re-enable. 
    2492 
    2493    For example, once an initial bullet list item, say, is recognized, the 
    2494    `BulletList` subclass takes over, with a "bullet_list" node as its 
    2495    container.  Upon encountering the initial bullet list item, `Body.bullet` 
    2496    calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which 
    2497    starts up a nested parsing session with `BulletList` as the initial state. 
    2498    Only the ``bullet`` transition method is enabled in `BulletList`; as long 
    2499    as only bullet list items are encountered, they are parsed and inserted 
    2500    into the container.  The first construct which is *not* a bullet list item 
    2501    triggers the `invalid_input` method, which ends the nested parse and 
    2502    closes the container.  `BulletList` needs to recognize input that is 
    2503    invalid in the context of a bullet list, which means everything *other 
    2504    than* bullet list items, so it inherits the transition list created in 
    2505    `Body`. 
    2506    """ 
    2507 
    2508    def invalid_input(self, match=None, context=None, next_state=None): 
    2509        """Not a compound element member. Abort this state machine.""" 
    2510        self.state_machine.previous_line()  # back up so parent SM can reassess 
    2511        raise EOFError 
    2512 
    2513    indent = invalid_input 
    2514    bullet = invalid_input 
    2515    enumerator = invalid_input 
    2516    field_marker = invalid_input 
    2517    option_marker = invalid_input 
    2518    doctest = invalid_input 
    2519    line_block = invalid_input 
    2520    grid_table_top = invalid_input 
    2521    simple_table_top = invalid_input 
    2522    explicit_markup = invalid_input 
    2523    anonymous = invalid_input 
    2524    line = invalid_input 
    2525    text = invalid_input 
    2526 
    2527 
    2528class BulletList(SpecializedBody): 
    2529 
    2530    """Second and subsequent bullet_list list_items.""" 
    2531 
    2532    def bullet(self, match, context, next_state): 
    2533        """Bullet list item.""" 
    2534        if match.string[0] != self.parent['bullet']: 
    2535            # different bullet: new list 
    2536            self.invalid_input() 
    2537        listitem, blank_finish = self.list_item(match.end()) 
    2538        self.parent += listitem 
    2539        self.blank_finish = blank_finish 
    2540        return [], next_state, [] 
    2541 
    2542 
    2543class DefinitionList(SpecializedBody): 
    2544 
    2545    """Second and subsequent definition_list_items.""" 
    2546 
    2547    def text(self, match, context, next_state): 
    2548        """Definition lists.""" 
    2549        return [match.string], 'Definition', [] 
    2550 
    2551 
    2552class EnumeratedList(SpecializedBody): 
    2553 
    2554    """Second and subsequent enumerated_list list_items.""" 
    2555 
    2556    def enumerator(self, match, context, next_state): 
    2557        """Enumerated list item.""" 
    2558        format, sequence, text, ordinal = self.parse_enumerator( 
    2559              match, self.parent['enumtype']) 
    2560        if (format != self.format 
    2561            or (sequence != '#' and (sequence != self.parent['enumtype'] 
    2562                                     or self.auto 
    2563                                     or ordinal != (self.lastordinal + 1))) 
    2564            or not self.is_enumerated_list_item(ordinal, sequence, format)): 
    2565            # different enumeration: new list 
    2566            self.invalid_input() 
    2567        if sequence == '#': 
    2568            self.auto = 1 
    2569        listitem, blank_finish = self.list_item(match.end()) 
    2570        self.parent += listitem 
    2571        self.blank_finish = blank_finish 
    2572        self.lastordinal = ordinal 
    2573        return [], next_state, [] 
    2574 
    2575 
    2576class FieldList(SpecializedBody): 
    2577 
    2578    """Second and subsequent field_list fields.""" 
    2579 
    2580    def field_marker(self, match, context, next_state): 
    2581        """Field list field.""" 
    2582        field, blank_finish = self.field(match) 
    2583        self.parent += field 
    2584        self.blank_finish = blank_finish 
    2585        return [], next_state, [] 
    2586 
    2587 
    2588class OptionList(SpecializedBody): 
    2589 
    2590    """Second and subsequent option_list option_list_items.""" 
    2591 
    2592    def option_marker(self, match, context, next_state): 
    2593        """Option list item.""" 
    2594        try: 
    2595            option_list_item, blank_finish = self.option_list_item(match) 
    2596        except MarkupError: 
    2597            self.invalid_input() 
    2598        self.parent += option_list_item 
    2599        self.blank_finish = blank_finish 
    2600        return [], next_state, [] 
    2601 
    2602 
    2603class RFC2822List(SpecializedBody, RFC2822Body): 
    2604 
    2605    """Second and subsequent RFC2822-style field_list fields.""" 
    2606 
    2607    patterns = RFC2822Body.patterns 
    2608    initial_transitions = RFC2822Body.initial_transitions 
    2609 
    2610    def rfc2822(self, match, context, next_state): 
    2611        """RFC2822-style field list item.""" 
    2612        field, blank_finish = self.rfc2822_field(match) 
    2613        self.parent += field 
    2614        self.blank_finish = blank_finish 
    2615        return [], 'RFC2822List', [] 
    2616 
    2617    blank = SpecializedBody.invalid_input 
    2618 
    2619 
    2620class ExtensionOptions(FieldList): 
    2621 
    2622    """ 
    2623    Parse field_list fields for extension options. 
    2624 
    2625    No nested parsing is done (including inline markup parsing). 
    2626    """ 
    2627 
    2628    def parse_field_body(self, indented, offset, node): 
    2629        """Override `Body.parse_field_body` for simpler parsing.""" 
    2630        lines = [] 
    2631        for line in list(indented) + ['']: 
    2632            if line.strip(): 
    2633                lines.append(line) 
    2634            elif lines: 
    2635                text = '\n'.join(lines) 
    2636                node += nodes.paragraph(text, text) 
    2637                lines = [] 
    2638 
    2639 
    2640class LineBlock(SpecializedBody): 
    2641 
    2642    """Second and subsequent lines of a line_block.""" 
    2643 
    2644    blank = SpecializedBody.invalid_input 
    2645 
    2646    def line_block(self, match, context, next_state): 
    2647        """New line of line block.""" 
    2648        lineno = self.state_machine.abs_line_number() 
    2649        line, messages, blank_finish = self.line_block_line(match, lineno) 
    2650        self.parent += line 
    2651        self.parent.parent += messages 
    2652        self.blank_finish = blank_finish 
    2653        return [], next_state, [] 
    2654 
    2655 
    2656class Explicit(SpecializedBody): 
    2657 
    2658    """Second and subsequent explicit markup construct.""" 
    2659 
    2660    def explicit_markup(self, match, context, next_state): 
    2661        """Footnotes, hyperlink targets, directives, comments.""" 
    2662        nodelist, blank_finish = self.explicit_construct(match) 
    2663        self.parent += nodelist 
    2664        self.blank_finish = blank_finish 
    2665        return [], next_state, [] 
    2666 
    2667    def anonymous(self, match, context, next_state): 
    2668        """Anonymous hyperlink targets.""" 
    2669        nodelist, blank_finish = self.anonymous_target(match) 
    2670        self.parent += nodelist 
    2671        self.blank_finish = blank_finish 
    2672        return [], next_state, [] 
    2673 
    2674    blank = SpecializedBody.invalid_input 
    2675 
    2676 
    2677class SubstitutionDef(Body): 
    2678 
    2679    """ 
    2680    Parser for the contents of a substitution_definition element. 
    2681    """ 
    2682 
    2683    patterns = { 
    2684          'embedded_directive': re.compile(r'(%s)::( +|$)' 
    2685                                           % Inliner.simplename), 
    2686          'text': r''} 
    2687    initial_transitions = ['embedded_directive', 'text'] 
    2688 
    2689    def embedded_directive(self, match, context, next_state): 
    2690        nodelist, blank_finish = self.directive(match, 
    2691                                                alt=self.parent['names'][0]) 
    2692        self.parent += nodelist 
    2693        if not self.state_machine.at_eof(): 
    2694            self.blank_finish = blank_finish 
    2695        raise EOFError 
    2696 
    2697    def text(self, match, context, next_state): 
    2698        if not self.state_machine.at_eof(): 
    2699            self.blank_finish = self.state_machine.is_next_line_blank() 
    2700        raise EOFError 
    2701 
    2702 
    2703class Text(RSTState): 
    2704 
    2705    """ 
    2706    Classifier of second line of a text block. 
    2707 
    2708    Could be a paragraph, a definition list item, or a title. 
    2709    """ 
    2710 
    2711    patterns = {'underline': Body.patterns['line'], 
    2712                'text': r''} 
    2713    initial_transitions = [('underline', 'Body'), ('text', 'Body')] 
    2714 
    2715    def blank(self, match, context, next_state): 
    2716        """End of paragraph.""" 
    2717        # NOTE: self.paragraph returns [node, system_message(s)], literalnext 
    2718        paragraph, literalnext = self.paragraph( 
    2719              context, self.state_machine.abs_line_number() - 1) 
    2720        self.parent += paragraph 
    2721        if literalnext: 
    2722            self.parent += self.literal_block() 
    2723        return [], 'Body', [] 
    2724 
    2725    def eof(self, context): 
    2726        if context: 
    2727            self.blank(None, context, None) 
    2728        return [] 
    2729 
    2730    def indent(self, match, context, next_state): 
    2731        """Definition list item.""" 
    2732        dl = nodes.definition_list() 
    2733        # the definition list starts on the line before the indent: 
    2734        lineno = self.state_machine.abs_line_number() - 1 
    2735        dl.source, dl.line = self.state_machine.get_source_and_line(lineno) 
    2736        dl_item, blank_finish = self.definition_list_item(context) 
    2737        dl += dl_item 
    2738        self.parent += dl 
    2739        offset = self.state_machine.line_offset + 1   # next line 
    2740        newline_offset, blank_finish = self.nested_list_parse( 
    2741              self.state_machine.input_lines[offset:], 
    2742              input_offset=self.state_machine.abs_line_offset() + 1, 
    2743              node=dl, initial_state='DefinitionList', 
    2744              blank_finish=blank_finish, blank_finish_state='Definition') 
    2745        self.goto_line(newline_offset) 
    2746        if not blank_finish: 
    2747            self.parent += self.unindent_warning('Definition list') 
    2748        return [], 'Body', [] 
    2749 
    2750    def underline(self, match, context, next_state): 
    2751        """Section title.""" 
    2752        lineno = self.state_machine.abs_line_number() 
    2753        title = context[0].rstrip() 
    2754        underline = match.string.rstrip() 
    2755        source = title + '\n' + underline 
    2756        messages = [] 
    2757        if column_width(title) > len(underline): 
    2758            if len(underline) < 4: 
    2759                if self.state_machine.match_titles: 
    2760                    msg = self.reporter.info( 
    2761                        'Possible title underline, too short for the title.\n' 
    2762                        "Treating it as ordinary text because it's so short.", 
    2763                        line=lineno) 
    2764                    self.parent += msg 
    2765                raise statemachine.TransitionCorrection('text') 
    2766            else: 
    2767                blocktext = context[0] + '\n' + self.state_machine.line 
    2768                msg = self.reporter.warning( 
    2769                    'Title underline too short.', 
    2770                    nodes.literal_block(blocktext, blocktext), 
    2771                    line=lineno) 
    2772                messages.append(msg) 
    2773        if not self.state_machine.match_titles: 
    2774            blocktext = context[0] + '\n' + self.state_machine.line 
    2775            # We need get_source_and_line() here to report correctly 
    2776            src, srcline = self.state_machine.get_source_and_line() 
    2777            # TODO: why is abs_line_number() == srcline+1 
    2778            # if the error is in a table (try with test_tables.py)? 
    2779            # print("get_source_and_line", srcline) 
    2780            # print("abs_line_number", self.state_machine.abs_line_number()) 
    2781            msg = self.reporter.severe( 
    2782                'Unexpected section title.', 
    2783                nodes.literal_block(blocktext, blocktext), 
    2784                source=src, line=srcline) 
    2785            self.parent += messages 
    2786            self.parent += msg 
    2787            return [], next_state, [] 
    2788        style = underline[0] 
    2789        context[:] = [] 
    2790        self.section(title, source, style, lineno - 1, messages) 
    2791        return [], next_state, [] 
    2792 
    2793    def text(self, match, context, next_state): 
    2794        """Paragraph.""" 
    2795        startline = self.state_machine.abs_line_number() - 1 
    2796        msg = None 
    2797        try: 
    2798            block = self.state_machine.get_text_block(flush_left=True) 
    2799        except statemachine.UnexpectedIndentationError as err: 
    2800            block, src, srcline = err.args 
    2801            msg = self.reporter.error('Unexpected indentation.', 
    2802                                      source=src, line=srcline) 
    2803        lines = context + list(block) 
    2804        paragraph, literalnext = self.paragraph(lines, startline) 
    2805        self.parent += paragraph 
    2806        self.parent += msg 
    2807        if literalnext: 
    2808            try: 
    2809                self.state_machine.next_line() 
    2810            except EOFError: 
    2811                pass 
    2812            self.parent += self.literal_block() 
    2813        return [], next_state, [] 
    2814 
    2815    def literal_block(self): 
    2816        """Return a list of nodes.""" 
    2817        (indented, indent, offset, blank_finish 
    2818         ) = self.state_machine.get_indented() 
    2819        while indented and not indented[-1].strip(): 
    2820            indented.trim_end() 
    2821        if not indented: 
    2822            return self.quoted_literal_block() 
    2823        data = '\n'.join(indented) 
    2824        literal_block = nodes.literal_block(data, data) 
    2825        (literal_block.source, 
    2826         literal_block.line) = self.state_machine.get_source_and_line(offset+1) 
    2827        nodelist = [literal_block] 
    2828        if not blank_finish: 
    2829            nodelist.append(self.unindent_warning('Literal block')) 
    2830        return nodelist 
    2831 
    2832    def quoted_literal_block(self): 
    2833        abs_line_offset = self.state_machine.abs_line_offset() 
    2834        offset = self.state_machine.line_offset 
    2835        parent_node = nodes.Element() 
    2836        new_abs_offset = self.nested_parse( 
    2837            self.state_machine.input_lines[offset:], 
    2838            input_offset=abs_line_offset, node=parent_node, match_titles=False, 
    2839            state_machine_kwargs={'state_classes': (QuotedLiteralBlock,), 
    2840                                  'initial_state': 'QuotedLiteralBlock'}) 
    2841        self.goto_line(new_abs_offset) 
    2842        return parent_node.children 
    2843 
    2844    def definition_list_item(self, termline): 
    2845        # the parser is already on the second (indented) line: 
    2846        dd_lineno = self.state_machine.abs_line_number() 
    2847        dt_lineno = dd_lineno - 1 
    2848        (indented, indent, line_offset, blank_finish 
    2849         ) = self.state_machine.get_indented() 
    2850        dl_item = nodes.definition_list_item( 
    2851                      '\n'.join(termline + list(indented))) 
    2852        (dl_item.source, 
    2853         dl_item.line) = self.state_machine.get_source_and_line(dt_lineno) 
    2854        dt_nodes, messages = self.term(termline, dt_lineno) 
    2855        dl_item += dt_nodes 
    2856        dd = nodes.definition('', *messages) 
    2857        dd.source, dd.line = self.state_machine.get_source_and_line(dd_lineno) 
    2858        dl_item += dd 
    2859        if termline[0][-2:] == '::': 
    2860            dd += self.reporter.info( 
    2861                  'Blank line missing before literal block (after the "::")? ' 
    2862                  'Interpreted as a definition list item.', 
    2863                  line=dd_lineno) 
    2864        # TODO: drop a definition if it is an empty comment to allow 
    2865        #       definition list items with several terms? 
    2866        #       https://sourceforge.net/p/docutils/feature-requests/60/ 
    2867        self.nested_parse(indented, input_offset=line_offset, node=dd) 
    2868        return dl_item, blank_finish 
    2869 
    2870    classifier_delimiter = re.compile(' +: +') 
    2871 
    2872    def term(self, lines, lineno): 
    2873        """Return a definition_list's term and optional classifiers.""" 
    2874        assert len(lines) == 1 
    2875        text_nodes, messages = self.inline_text(lines[0], lineno) 
    2876        dt = nodes.term(lines[0]) 
    2877        dt.source, dt.line = self.state_machine.get_source_and_line(lineno) 
    2878        node_list = [dt] 
    2879        for i in range(len(text_nodes)): 
    2880            node = text_nodes[i] 
    2881            if isinstance(node, nodes.Text): 
    2882                parts = self.classifier_delimiter.split(node) 
    2883                if len(parts) == 1: 
    2884                    node_list[-1] += node 
    2885                else: 
    2886                    text = parts[0].rstrip() 
    2887                    textnode = nodes.Text(text) 
    2888                    node_list[-1] += textnode 
    2889                    for part in parts[1:]: 
    2890                        node_list.append( 
    2891                            nodes.classifier(unescape(part, True), part)) 
    2892            else: 
    2893                node_list[-1] += node 
    2894        return node_list, messages 
    2895 
    2896 
    2897class SpecializedText(Text): 
    2898 
    2899    """ 
    2900    Superclass for second and subsequent lines of Text-variants. 
    2901 
    2902    All transition methods are disabled. Override individual methods in 
    2903    subclasses to re-enable. 
    2904    """ 
    2905 
    2906    def eof(self, context): 
    2907        """Incomplete construct.""" 
    2908        return [] 
    2909 
    2910    def invalid_input(self, match=None, context=None, next_state=None): 
    2911        """Not a compound element member. Abort this state machine.""" 
    2912        raise EOFError 
    2913 
    2914    blank = invalid_input 
    2915    indent = invalid_input 
    2916    underline = invalid_input 
    2917    text = invalid_input 
    2918 
    2919 
    2920class Definition(SpecializedText): 
    2921 
    2922    """Second line of potential definition_list_item.""" 
    2923 
    2924    def eof(self, context): 
    2925        """Not a definition.""" 
    2926        self.state_machine.previous_line(2)  # so parent SM can reassess 
    2927        return [] 
    2928 
    2929    def indent(self, match, context, next_state): 
    2930        """Definition list item.""" 
    2931        dl_item, blank_finish = self.definition_list_item(context) 
    2932        self.parent += dl_item 
    2933        self.blank_finish = blank_finish 
    2934        return [], 'DefinitionList', [] 
    2935 
    2936 
    2937class Line(SpecializedText): 
    2938 
    2939    """ 
    2940    Second line of over- & underlined section title or transition marker. 
    2941    """ 
    2942 
    2943    eofcheck = 1                        # @@@ ??? 
    2944    """Set to 0 while parsing sections, so that we don't catch the EOF.""" 
    2945 
    2946    def eof(self, context): 
    2947        """Transition marker at end of section or document.""" 
    2948        marker = context[0].strip() 
    2949        if self.memo.section_bubble_up_kludge: 
    2950            self.memo.section_bubble_up_kludge = False 
    2951        elif len(marker) < 4: 
    2952            self.state_correction(context) 
    2953        if self.eofcheck:               # ignore EOFError with sections 
    2954            src, srcline = self.state_machine.get_source_and_line() 
    2955            # lineno = self.state_machine.abs_line_number() - 1 
    2956            transition = nodes.transition(rawsource=context[0]) 
    2957            transition.source = src 
    2958            transition.line = srcline - 1 
    2959            # transition.line = lineno 
    2960            self.parent += transition 
    2961        self.eofcheck = 1 
    2962        return [] 
    2963 
    2964    def blank(self, match, context, next_state): 
    2965        """Transition marker.""" 
    2966        src, srcline = self.state_machine.get_source_and_line() 
    2967        marker = context[0].strip() 
    2968        if len(marker) < 4: 
    2969            self.state_correction(context) 
    2970        transition = nodes.transition(rawsource=marker) 
    2971        transition.source = src 
    2972        transition.line = srcline - 1 
    2973        self.parent += transition 
    2974        return [], 'Body', [] 
    2975 
    2976    def text(self, match, context, next_state): 
    2977        """Potential over- & underlined title.""" 
    2978        lineno = self.state_machine.abs_line_number() - 1 
    2979        overline = context[0] 
    2980        title = match.string 
    2981        underline = '' 
    2982        try: 
    2983            underline = self.state_machine.next_line() 
    2984        except EOFError: 
    2985            blocktext = overline + '\n' + title 
    2986            if len(overline.rstrip()) < 4: 
    2987                self.short_overline(context, blocktext, lineno, 2) 
    2988            else: 
    2989                msg = self.reporter.severe( 
    2990                    'Incomplete section title.', 
    2991                    nodes.literal_block(blocktext, blocktext), 
    2992                    line=lineno) 
    2993                self.parent += msg 
    2994                return [], 'Body', [] 
    2995        source = '%s\n%s\n%s' % (overline, title, underline) 
    2996        overline = overline.rstrip() 
    2997        underline = underline.rstrip() 
    2998        if not self.transitions['underline'][0].match(underline): 
    2999            blocktext = overline + '\n' + title + '\n' + underline 
    3000            if len(overline.rstrip()) < 4: 
    3001                self.short_overline(context, blocktext, lineno, 2) 
    3002            else: 
    3003                msg = self.reporter.severe( 
    3004                    'Missing matching underline for section title overline.', 
    3005                    nodes.literal_block(source, source), 
    3006                    line=lineno) 
    3007                self.parent += msg 
    3008                return [], 'Body', [] 
    3009        elif overline != underline: 
    3010            blocktext = overline + '\n' + title + '\n' + underline 
    3011            if len(overline.rstrip()) < 4: 
    3012                self.short_overline(context, blocktext, lineno, 2) 
    3013            else: 
    3014                msg = self.reporter.severe( 
    3015                      'Title overline & underline mismatch.', 
    3016                      nodes.literal_block(source, source), 
    3017                      line=lineno) 
    3018                self.parent += msg 
    3019                return [], 'Body', [] 
    3020        title = title.rstrip() 
    3021        messages = [] 
    3022        if column_width(title) > len(overline): 
    3023            blocktext = overline + '\n' + title + '\n' + underline 
    3024            if len(overline.rstrip()) < 4: 
    3025                self.short_overline(context, blocktext, lineno, 2) 
    3026            else: 
    3027                msg = self.reporter.warning( 
    3028                      'Title overline too short.', 
    3029                      nodes.literal_block(source, source), 
    3030                      line=lineno) 
    3031                messages.append(msg) 
    3032        style = (overline[0], underline[0]) 
    3033        self.eofcheck = 0               # @@@ not sure this is correct 
    3034        self.section(title.lstrip(), source, style, lineno + 1, messages) 
    3035        self.eofcheck = 1 
    3036        return [], 'Body', [] 
    3037 
    3038    indent = text                       # indented title 
    3039 
    3040    def underline(self, match, context, next_state): 
    3041        overline = context[0] 
    3042        blocktext = overline + '\n' + self.state_machine.line 
    3043        lineno = self.state_machine.abs_line_number() - 1 
    3044        if len(overline.rstrip()) < 4: 
    3045            self.short_overline(context, blocktext, lineno, 1) 
    3046        msg = self.reporter.error( 
    3047              'Invalid section title or transition marker.', 
    3048              nodes.literal_block(blocktext, blocktext), 
    3049              line=lineno) 
    3050        self.parent += msg 
    3051        return [], 'Body', [] 
    3052 
    3053    def short_overline(self, context, blocktext, lineno, lines=1): 
    3054        msg = self.reporter.info( 
    3055            'Possible incomplete section title.\nTreating the overline as ' 
    3056            "ordinary text because it's so short.", 
    3057            line=lineno) 
    3058        self.parent += msg 
    3059        self.state_correction(context, lines) 
    3060 
    3061    def state_correction(self, context, lines=1): 
    3062        self.state_machine.previous_line(lines) 
    3063        context[:] = [] 
    3064        raise statemachine.StateCorrection('Body', 'text') 
    3065 
    3066 
    3067class QuotedLiteralBlock(RSTState): 
    3068 
    3069    """ 
    3070    Nested parse handler for quoted (unindented) literal blocks. 
    3071 
    3072    Special-purpose.  Not for inclusion in `state_classes`. 
    3073    """ 
    3074 
    3075    patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats, 
    3076                'text': r''} 
    3077    initial_transitions = ('initial_quoted', 'text') 
    3078 
    3079    def __init__(self, state_machine, debug=False): 
    3080        RSTState.__init__(self, state_machine, debug) 
    3081        self.messages = [] 
    3082        self.initial_lineno = None 
    3083 
    3084    def blank(self, match, context, next_state): 
    3085        if context: 
    3086            raise EOFError 
    3087        else: 
    3088            return context, next_state, [] 
    3089 
    3090    def eof(self, context): 
    3091        if context: 
    3092            src, srcline = self.state_machine.get_source_and_line( 
    3093                                                        self.initial_lineno) 
    3094            text = '\n'.join(context) 
    3095            literal_block = nodes.literal_block(text, text) 
    3096            literal_block.source = src 
    3097            literal_block.line = srcline 
    3098            self.parent += literal_block 
    3099        else: 
    3100            self.parent += self.reporter.warning( 
    3101                'Literal block expected; none found.', 
    3102                line=self.state_machine.abs_line_number() 
    3103                )  # src not available, statemachine.input_lines is empty 
    3104            self.state_machine.previous_line() 
    3105        self.parent += self.messages 
    3106        return [] 
    3107 
    3108    def indent(self, match, context, next_state): 
    3109        assert context, ('QuotedLiteralBlock.indent: context should not ' 
    3110                         'be empty!') 
    3111        self.messages.append( 
    3112            self.reporter.error('Unexpected indentation.', 
    3113                                line=self.state_machine.abs_line_number())) 
    3114        self.state_machine.previous_line() 
    3115        raise EOFError 
    3116 
    3117    def initial_quoted(self, match, context, next_state): 
    3118        """Match arbitrary quote character on the first line only.""" 
    3119        self.remove_transition('initial_quoted') 
    3120        quote = match.string[0] 
    3121        pattern = re.compile(re.escape(quote)) 
    3122        # New transition matches consistent quotes only: 
    3123        self.add_transition('quoted', 
    3124                            (pattern, self.quoted, self.__class__.__name__)) 
    3125        self.initial_lineno = self.state_machine.abs_line_number() 
    3126        return [match.string], next_state, [] 
    3127 
    3128    def quoted(self, match, context, next_state): 
    3129        """Match consistent quotes on subsequent lines.""" 
    3130        context.append(match.string) 
    3131        return context, next_state, [] 
    3132 
    3133    def text(self, match, context, next_state): 
    3134        if context: 
    3135            self.messages.append( 
    3136                self.reporter.error('Inconsistent literal block quoting.', 
    3137                                    line=self.state_machine.abs_line_number())) 
    3138            self.state_machine.previous_line() 
    3139        raise EOFError 
    3140 
    3141 
    3142state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList, 
    3143                 OptionList, LineBlock, ExtensionOptions, Explicit, Text, 
    3144                 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List) 
    3145"""Standard set of State classes used to start `RSTStateMachine`."""