1# $Id$ 
    2# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde 
    3# Maintainer: docutils-develop@lists.sourceforge.net 
    4# Copyright: This module has been placed in the public domain. 
    5 
    6""" 
    7Transforms needed by most or all documents: 
    8 
    9- `Decorations`: Generate a document's header & footer. 
    10- `ExposeInternals`: Expose internal attributes. 
    11- `Messages`: Placement of system messages generated after parsing. 
    12- `FilterMessages`: Remove system messages below verbosity threshold. 
    13- `TestMessages`: Like `Messages`, used on test runs. 
    14- `StripComments`: Remove comment elements from the document tree. 
    15- `StripClassesAndElements`: Remove elements with classes 
    16  in `self.document.settings.strip_elements_with_classes` 
    17  and class values in `self.document.settings.strip_classes`. 
    18- `SmartQuotes`: Replace ASCII quotation marks with typographic form. 
    19- `Validate`: Validate the document tree, report violations as warning. 
    20""" 
    21 
    22__docformat__ = 'reStructuredText' 
    23 
    24import re 
    25import time 
    26from docutils import nodes, utils 
    27from docutils.transforms import Transform 
    28from docutils.utils import smartquotes 
    29 
    30 
    31class Decorations(Transform): 
    32 
    33    """ 
    34    Populate a document's decoration element (header, footer). 
    35    """ 
    36 
    37    default_priority = 820 
    38 
    39    def apply(self): 
    40        header_nodes = self.generate_header() 
    41        if header_nodes: 
    42            decoration = self.document.get_decoration() 
    43            header = decoration.get_header() 
    44            header.extend(header_nodes) 
    45        footer_nodes = self.generate_footer() 
    46        if footer_nodes: 
    47            decoration = self.document.get_decoration() 
    48            footer = decoration.get_footer() 
    49            footer.extend(footer_nodes) 
    50 
    51    def generate_header(self): 
    52        return None 
    53 
    54    def generate_footer(self): 
    55        # @@@ Text is hard-coded for now. 
    56        # Should be made dynamic (language-dependent). 
    57        # @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable 
    58        # for the datestamp? 
    59        # See https://sourceforge.net/p/docutils/patches/132/ 
    60        # and https://reproducible-builds.org/specs/source-date-epoch/ 
    61        settings = self.document.settings 
    62        if (settings.generator or settings.datestamp 
    63            or settings.source_link or settings.source_url): 
    64            text = [] 
    65            if (settings.source_link and settings._source 
    66                or settings.source_url): 
    67                if settings.source_url: 
    68                    source = settings.source_url 
    69                else: 
    70                    source = utils.relative_path(settings._destination, 
    71                                                 settings._source) 
    72                text.extend([ 
    73                    nodes.reference('', 'View document source', 
    74                                    refuri=source), 
    75                    nodes.Text('.\n')]) 
    76            if settings.datestamp: 
    77                datestamp = time.strftime(settings.datestamp, time.gmtime()) 
    78                text.append(nodes.Text('Generated on: ' + datestamp + '.\n')) 
    79            if settings.generator: 
    80                text.extend([ 
    81                    nodes.Text('Generated by '), 
    82                    nodes.reference('', 'Docutils', 
    83                                    refuri='https://docutils.sourceforge.io/'), 
    84                    nodes.Text(' from '), 
    85                    nodes.reference('', 'reStructuredText', 
    86                                    refuri='https://docutils.sourceforge.io/' 
    87                                    'rst.html'), 
    88                    nodes.Text(' source.\n')]) 
    89            return [nodes.paragraph('', '', *text)] 
    90        else: 
    91            return None 
    92 
    93 
    94class ExposeInternals(Transform): 
    95 
    96    """ 
    97    Expose internal attributes if ``expose_internals`` setting is set. 
    98    """ 
    99 
    100    default_priority = 840 
    101 
    102    def not_Text(self, node): 
    103        return not isinstance(node, nodes.Text) 
    104 
    105    def apply(self): 
    106        if self.document.settings.expose_internals: 
    107            for node in self.document.findall(self.not_Text): 
    108                for att in self.document.settings.expose_internals: 
    109                    value = getattr(node, att, None) 
    110                    if value is not None: 
    111                        node['internal:' + att] = value 
    112 
    113 
    114class Messages(Transform): 
    115 
    116    """Handle "loose" messages. 
    117 
    118    Place system messages generated by parsing or transforms that are not 
    119    attached to the document tree into a dedicated section of the document. 
    120    """ 
    121 
    122    default_priority = 860 
    123 
    124    def apply(self): 
    125        messages = [*self.document.parse_messages, 
    126                    *self.document.transform_messages] 
    127        loose_messages = [msg for msg in messages if not msg.parent] 
    128        if loose_messages: 
    129            section = nodes.section(classes=['system-messages']) 
    130            # @@@ get this from the language module? 
    131            section += nodes.title('', 'Docutils System Messages') 
    132            section += loose_messages 
    133            self.document.transform_messages[:] = [] 
    134            self.document += section 
    135 
    136 
    137class FilterMessages(Transform): 
    138 
    139    """ 
    140    Remove system messages below verbosity threshold. 
    141 
    142    Also convert <problematic> nodes referencing removed messages 
    143    to <Text> nodes and remove "System Messages" section if empty. 
    144    """ 
    145 
    146    default_priority = 870 
    147 
    148    def apply(self): 
    149        removed_ids = []  # IDs of removed system messages 
    150        for node in tuple(self.document.findall(nodes.system_message)): 
    151            if node['level'] < self.document.reporter.report_level: 
    152                node.parent.remove(node) 
    153                for _id in node['ids']: 
    154                    self.document.ids.pop(_id, None)  # remove ID registration 
    155                    removed_ids.append(_id) 
    156        for node in tuple(self.document.findall(nodes.problematic)): 
    157            if 'refid' in node and node['refid'] in removed_ids: 
    158                node.parent.replace(node, nodes.Text(node.astext())) 
    159        for node in self.document.findall(nodes.section): 
    160            if "system-messages" in node['classes'] and len(node) == 1: 
    161                node.parent.remove(node) 
    162 
    163 
    164class TestMessages(Transform): 
    165 
    166    """ 
    167    Append all post-parse system messages to the end of the document. 
    168 
    169    Used for testing purposes. 
    170    """ 
    171 
    172    # marker for pytest to ignore this class during test discovery 
    173    __test__ = False 
    174 
    175    default_priority = 880 
    176 
    177    def apply(self): 
    178        for msg in self.document.transform_messages: 
    179            if not msg.parent: 
    180                self.document += msg 
    181 
    182 
    183class StripComments(Transform): 
    184 
    185    """ 
    186    Remove comment elements from the document tree (only if the 
    187    ``strip_comments`` setting is enabled). 
    188    """ 
    189 
    190    default_priority = 740 
    191 
    192    def apply(self): 
    193        if self.document.settings.strip_comments: 
    194            for node in tuple(self.document.findall(nodes.comment)): 
    195                node.parent.remove(node) 
    196 
    197 
    198class StripClassesAndElements(Transform): 
    199 
    200    """ 
    201    Remove from the document tree all elements with classes in 
    202    `self.document.settings.strip_elements_with_classes` and all "classes" 
    203    attribute values in `self.document.settings.strip_classes`. 
    204    """ 
    205 
    206    default_priority = 420 
    207 
    208    def apply(self): 
    209        if self.document.settings.strip_elements_with_classes: 
    210            self.strip_elements = {*self.document.settings 
    211                                   .strip_elements_with_classes} 
    212            # Iterate over a tuple as removing the current node 
    213            # corrupts the iterator returned by `iter`: 
    214            for node in tuple(self.document.findall(self.check_classes)): 
    215                node.parent.remove(node) 
    216 
    217        if not self.document.settings.strip_classes: 
    218            return 
    219        strip_classes = self.document.settings.strip_classes 
    220        for node in self.document.findall(nodes.Element): 
    221            for class_value in strip_classes: 
    222                try: 
    223                    node['classes'].remove(class_value) 
    224                except ValueError: 
    225                    pass 
    226 
    227    def check_classes(self, node): 
    228        if not isinstance(node, nodes.Element): 
    229            return False 
    230        for class_value in node['classes'][:]: 
    231            if class_value in self.strip_elements: 
    232                return True 
    233        return False 
    234 
    235 
    236class SmartQuotes(Transform): 
    237 
    238    """ 
    239    Replace ASCII quotation marks with typographic form. 
    240 
    241    Also replace multiple dashes with em-dash/en-dash characters. 
    242    """ 
    243 
    244    default_priority = 855 
    245 
    246    nodes_to_skip = (nodes.FixedTextElement, nodes.Special) 
    247    """Do not apply "smartquotes" to instances of these block-level nodes.""" 
    248 
    249    literal_nodes = (nodes.FixedTextElement, nodes.Special, 
    250                     nodes.image, nodes.literal, nodes.math, 
    251                     nodes.raw, nodes.problematic) 
    252    """Do not apply smartquotes to instances of these inline nodes.""" 
    253 
    254    smartquotes_action = 'qDe' 
    255    """Setting to select smartquote transformations. 
    256 
    257    The default 'qDe' educates normal quote characters: (", '), 
    258    em- and en-dashes (---, --) and ellipses (...). 
    259    """ 
    260 
    261    def __init__(self, document, startnode): 
    262        Transform.__init__(self, document, startnode=startnode) 
    263        self.unsupported_languages = set() 
    264 
    265    def get_tokens(self, txtnodes): 
    266        # A generator that yields ``(texttype, nodetext)`` tuples for a list 
    267        # of "Text" nodes (interface to ``smartquotes.educate_tokens()``). 
    268        for node in txtnodes: 
    269            if (isinstance(node.parent, self.literal_nodes) 
    270                or isinstance(node.parent.parent, self.literal_nodes)): 
    271                yield 'literal', str(node) 
    272            else: 
    273                # SmartQuotes uses backslash escapes instead of null-escapes 
    274                # Insert backslashes before escaped "active" characters. 
    275                txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', str(node)) 
    276                yield 'plain', txt 
    277 
    278    def apply(self): 
    279        smart_quotes = self.document.settings.setdefault('smart_quotes', 
    280                                                         False) 
    281        if not smart_quotes: 
    282            return 
    283        try: 
    284            alternative = smart_quotes.startswith('alt') 
    285        except AttributeError: 
    286            alternative = False 
    287 
    288        document_language = self.document.settings.language_code 
    289        lc_smartquotes = self.document.settings.smartquotes_locales 
    290        if lc_smartquotes: 
    291            smartquotes.smartchars.quotes.update(dict(lc_smartquotes)) 
    292 
    293        # "Educate" quotes in normal text. Handle each block of text 
    294        # (TextElement node) as a unit to keep context around inline nodes: 
    295        for node in self.document.findall(nodes.TextElement): 
    296            # skip preformatted text blocks and special elements: 
    297            if isinstance(node, self.nodes_to_skip): 
    298                continue 
    299            # nested TextElements are not "block-level" elements: 
    300            if isinstance(node.parent, nodes.TextElement): 
    301                continue 
    302 
    303            # list of text nodes in the "text block": 
    304            txtnodes = [txtnode for txtnode in node.findall(nodes.Text) 
    305                        if not isinstance(txtnode.parent, 
    306                                          nodes.option_string)] 
    307 
    308            # language: use typographical quotes for language "lang" 
    309            lang = node.get_language_code(document_language) 
    310            # use alternative form if `smart-quotes` setting starts with "alt": 
    311            if alternative: 
    312                if '-x-altquot' in lang: 
    313                    lang = lang.replace('-x-altquot', '') 
    314                else: 
    315                    lang += '-x-altquot' 
    316            # drop unsupported subtags: 
    317            for tag in utils.normalize_language_tag(lang): 
    318                if tag in smartquotes.smartchars.quotes: 
    319                    lang = tag 
    320                    break 
    321            else:  # language not supported -- keep ASCII quotes 
    322                if lang not in self.unsupported_languages: 
    323                    self.document.reporter.warning( 
    324                        'No smart quotes defined for language "%s".' % lang, 
    325                        base_node=node) 
    326                self.unsupported_languages.add(lang) 
    327                lang = '' 
    328 
    329            # Iterator educating quotes in plain text: 
    330            # (see "utils/smartquotes.py" for the attribute setting) 
    331            teacher = smartquotes.educate_tokens( 
    332                self.get_tokens(txtnodes), 
    333                attr=self.smartquotes_action, language=lang) 
    334 
    335            for txtnode, newtext in zip(txtnodes, teacher): 
    336                txtnode.parent.replace(txtnode, nodes.Text(newtext)) 
    337 
    338        self.unsupported_languages.clear() 
    339 
    340 
    341class Validate(Transform): 
    342 
    343    """ 
    344    Validate the document tree, report violations as warning. 
    345    """ 
    346 
    347    default_priority = 835  # between misc.Transitions and  universal.Messages 
    348 
    349    def apply(self): 
    350        if not getattr(self.document.settings, 'validate', False): 
    351            return 
    352        for node in self.document.findall(): 
    353            try: 
    354                node.validate(recursive=False) 
    355            except nodes.ValidationError as e: 
    356                self.document.reporter.warning( 
    357                    str(e), base_node=e.problematic_element or node) 
    358        # TODO: append a link to the Document Tree documentation? 
    359        # nodes.paragraph('', 'See ', 
    360        # nodes.reference('', 'doctree.html#document', 
    361        #                 refuri='https://docutils.sourceforge.io/' 
    362        #                 'docs/ref/doctree.html#document'),