1# Smarty extension for Python-Markdown 
    2# ==================================== 
    3 
    4# Adds conversion of ASCII dashes, quotes and ellipses to their HTML 
    5# entity equivalents. 
    6 
    7# See https://Python-Markdown.github.io/extensions/smarty 
    8# for documentation. 
    9 
    10# Author: 2013, Dmitry Shachnev <mitya57@gmail.com> 
    11 
    12# All changes Copyright 2013-2014 The Python Markdown Project 
    13 
    14# License: [BSD](https://opensource.org/licenses/bsd-license.php) 
    15 
    16# SmartyPants license: 
    17 
    18#    Copyright (c) 2003 John Gruber <https://daringfireball.net/> 
    19#    All rights reserved. 
    20 
    21#    Redistribution and use in source and binary forms, with or without 
    22#    modification, are permitted provided that the following conditions are 
    23#    met: 
    24 
    25#    *  Redistributions of source code must retain the above copyright 
    26#       notice, this list of conditions and the following disclaimer. 
    27 
    28#    *  Redistributions in binary form must reproduce the above copyright 
    29#       notice, this list of conditions and the following disclaimer in 
    30#       the documentation and/or other materials provided with the 
    31#       distribution. 
    32 
    33#    *  Neither the name "SmartyPants" nor the names of its contributors 
    34#       may be used to endorse or promote products derived from this 
    35#       software without specific prior written permission. 
    36 
    37#    This software is provided by the copyright holders and contributors "as 
    38#    is" and any express or implied warranties, including, but not limited 
    39#    to, the implied warranties of merchantability and fitness for a 
    40#    particular purpose are disclaimed. In no event shall the copyright 
    41#    owner or contributors be liable for any direct, indirect, incidental, 
    42#    special, exemplary, or consequential damages (including, but not 
    43#    limited to, procurement of substitute goods or services; loss of use, 
    44#    data, or profits; or business interruption) however caused and on any 
    45#    theory of liability, whether in contract, strict liability, or tort 
    46#    (including negligence or otherwise) arising in any way out of the use 
    47#    of this software, even if advised of the possibility of such damage. 
    48 
    49 
    50# `smartypants.py` license: 
    51 
    52#    `smartypants.py` is a derivative work of SmartyPants. 
    53#    Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/> 
    54 
    55#    Redistribution and use in source and binary forms, with or without 
    56#    modification, are permitted provided that the following conditions are 
    57#    met: 
    58 
    59#    *  Redistributions of source code must retain the above copyright 
    60#       notice, this list of conditions and the following disclaimer. 
    61 
    62#    *  Redistributions in binary form must reproduce the above copyright 
    63#       notice, this list of conditions and the following disclaimer in 
    64#       the documentation and/or other materials provided with the 
    65#       distribution. 
    66 
    67#    This software is provided by the copyright holders and contributors "as 
    68#    is" and any express or implied warranties, including, but not limited 
    69#    to, the implied warranties of merchantability and fitness for a 
    70#    particular purpose are disclaimed. In no event shall the copyright 
    71#    owner or contributors be liable for any direct, indirect, incidental, 
    72#    special, exemplary, or consequential damages (including, but not 
    73#    limited to, procurement of substitute goods or services; loss of use, 
    74#    data, or profits; or business interruption) however caused and on any 
    75#    theory of liability, whether in contract, strict liability, or tort 
    76#    (including negligence or otherwise) arising in any way out of the use 
    77#    of this software, even if advised of the possibility of such damage. 
    78 
    79""" 
    80Convert ASCII dashes, quotes and ellipses to their HTML entity equivalents. 
    81 
    82See the [documentation](https://Python-Markdown.github.io/extensions/smarty) 
    83for details. 
    84""" 
    85 
    86from __future__ import annotations 
    87 
    88from . import Extension 
    89from ..inlinepatterns import HtmlInlineProcessor, HTML_RE 
    90from ..treeprocessors import InlineProcessor 
    91from ..util import Registry 
    92from typing import TYPE_CHECKING, Sequence 
    93 
    94if TYPE_CHECKING:  # pragma: no cover 
    95    from markdown import Markdown 
    96    from .. import inlinepatterns 
    97    import re 
    98    import xml.etree.ElementTree as etree 
    99 
    100# Constants for quote education. 
    101punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" 
    102endOfWordClass = r"[\s.,;:!?)]" 
    103closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]" 
    104 
    105openingQuotesBase = ( 
    106    r'(\s'               # a  whitespace char 
    107    r'| '           # or a non-breaking space entity 
    108    r'|--'               # or dashes 
    109    r'|–|—'              # or Unicode 
    110    r'|&[mn]dash;'       # or named dash entities 
    111    r'|–|—'  # or decimal entities 
    112    r')' 
    113) 
    114 
    115substitutions = { 
    116    'mdash': '—', 
    117    'ndash': '–', 
    118    'ellipsis': '…', 
    119    'left-angle-quote': '«', 
    120    'right-angle-quote': '»', 
    121    'left-single-quote': '‘', 
    122    'right-single-quote': '’', 
    123    'left-double-quote': '“', 
    124    'right-double-quote': '”', 
    125} 
    126 
    127 
    128# Special case if the very first character is a quote 
    129# followed by punctuation at a non-word-break. Close the quotes by brute force: 
    130singleQuoteStartRe = r"^'(?=%s\B)" % punctClass 
    131doubleQuoteStartRe = r'^"(?=%s\B)' % punctClass 
    132 
    133# Special case for double sets of quotes, e.g.: 
    134#   <p>He said, "'Quoted' words in a larger quote."</p> 
    135doubleQuoteSetsRe = r""""'(?=\w)""" 
    136singleQuoteSetsRe = r"""'"(?=\w)""" 
    137doubleQuoteSetsRe2 = r'(?<=%s)\'"' % closeClass 
    138singleQuoteSetsRe2 = r"(?<=%s)\"'" % closeClass 
    139 
    140# Special case for decade abbreviations (the '80s): 
    141decadeAbbrRe = r"(?<!\w)'(?=\d{2}s)" 
    142 
    143# Get most opening double quotes: 
    144openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase 
    145 
    146# Double closing quotes: 
    147closingDoubleQuotesRegex = r'"(?=\s)' 
    148closingDoubleQuotesRegex2 = r'(?<=%s)"' % closeClass 
    149 
    150# Get most opening single quotes: 
    151openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase 
    152 
    153# Single closing quotes: 
    154closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass 
    155closingSingleQuotesRegex2 = r"'(\s|s\b)" 
    156 
    157# All remaining quotes should be opening ones 
    158remainingSingleQuotesRegex = r"'" 
    159remainingDoubleQuotesRegex = r'"' 
    160 
    161HTML_STRICT_RE = HTML_RE + r'(?!\>)' 
    162 
    163 
    164class SubstituteTextPattern(HtmlInlineProcessor): 
    165    def __init__(self, pattern: str, replace: Sequence[int | str | etree.Element], md: Markdown): 
    166        """ Replaces matches with some text. """ 
    167        HtmlInlineProcessor.__init__(self, pattern) 
    168        self.replace = replace 
    169        self.md = md 
    170 
    171    def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: 
    172        result = '' 
    173        for part in self.replace: 
    174            if isinstance(part, int): 
    175                result += m.group(part) 
    176            else: 
    177                result += self.md.htmlStash.store(part) 
    178        return result, m.start(0), m.end(0) 
    179 
    180 
    181class SmartyExtension(Extension): 
    182    """ Add Smarty to Markdown. """ 
    183    def __init__(self, **kwargs): 
    184        self.config = { 
    185            'smart_quotes': [True, 'Educate quotes'], 
    186            'smart_angled_quotes': [False, 'Educate angled quotes'], 
    187            'smart_dashes': [True, 'Educate dashes'], 
    188            'smart_ellipses': [True, 'Educate ellipses'], 
    189            'substitutions': [{}, 'Overwrite default substitutions'], 
    190        } 
    191        """ Default configuration options. """ 
    192        super().__init__(**kwargs) 
    193        self.substitutions: dict[str, str] = dict(substitutions) 
    194        self.substitutions.update(self.getConfig('substitutions', default={})) 
    195 
    196    def _addPatterns( 
    197        self, 
    198        md: Markdown, 
    199        patterns: Sequence[tuple[str, Sequence[int | str | etree.Element]]], 
    200        serie: str, 
    201        priority: int, 
    202    ): 
    203        for ind, pattern in enumerate(patterns): 
    204            pattern += (md,) 
    205            pattern = SubstituteTextPattern(*pattern) 
    206            name = 'smarty-%s-%d' % (serie, ind) 
    207            self.inlinePatterns.register(pattern, name, priority-ind) 
    208 
    209    def educateDashes(self, md: Markdown) -> None: 
    210        emDashesPattern = SubstituteTextPattern( 
    211            r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md 
    212        ) 
    213        enDashesPattern = SubstituteTextPattern( 
    214            r'(?<!-)--(?!-)', (self.substitutions['ndash'],), md 
    215        ) 
    216        self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50) 
    217        self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45) 
    218 
    219    def educateEllipses(self, md: Markdown) -> None: 
    220        ellipsesPattern = SubstituteTextPattern( 
    221            r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md 
    222        ) 
    223        self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10) 
    224 
    225    def educateAngledQuotes(self, md: Markdown) -> None: 
    226        leftAngledQuotePattern = SubstituteTextPattern( 
    227            r'\<\<', (self.substitutions['left-angle-quote'],), md 
    228        ) 
    229        rightAngledQuotePattern = SubstituteTextPattern( 
    230            r'\>\>', (self.substitutions['right-angle-quote'],), md 
    231        ) 
    232        self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40) 
    233        self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35) 
    234 
    235    def educateQuotes(self, md: Markdown) -> None: 
    236        lsquo = self.substitutions['left-single-quote'] 
    237        rsquo = self.substitutions['right-single-quote'] 
    238        ldquo = self.substitutions['left-double-quote'] 
    239        rdquo = self.substitutions['right-double-quote'] 
    240        patterns = ( 
    241            (singleQuoteStartRe, (rsquo,)), 
    242            (doubleQuoteStartRe, (rdquo,)), 
    243            (doubleQuoteSetsRe, (ldquo + lsquo,)), 
    244            (singleQuoteSetsRe, (lsquo + ldquo,)), 
    245            (doubleQuoteSetsRe2, (rsquo + rdquo,)), 
    246            (singleQuoteSetsRe2, (rdquo + rsquo,)), 
    247            (decadeAbbrRe, (rsquo,)), 
    248            (openingSingleQuotesRegex, (1, lsquo)), 
    249            (closingSingleQuotesRegex, (rsquo,)), 
    250            (closingSingleQuotesRegex2, (rsquo, 1)), 
    251            (remainingSingleQuotesRegex, (lsquo,)), 
    252            (openingDoubleQuotesRegex, (1, ldquo)), 
    253            (closingDoubleQuotesRegex, (rdquo,)), 
    254            (closingDoubleQuotesRegex2, (rdquo,)), 
    255            (remainingDoubleQuotesRegex, (ldquo,)) 
    256        ) 
    257        self._addPatterns(md, patterns, 'quotes', 30) 
    258 
    259    def extendMarkdown(self, md): 
    260        configs = self.getConfigs() 
    261        self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry() 
    262        if configs['smart_ellipses']: 
    263            self.educateEllipses(md) 
    264        if configs['smart_quotes']: 
    265            self.educateQuotes(md) 
    266        if configs['smart_angled_quotes']: 
    267            self.educateAngledQuotes(md) 
    268            # Override `HTML_RE` from `inlinepatterns.py` so that it does not 
    269            # process tags with duplicate closing quotes. 
    270            md.inlinePatterns.register(HtmlInlineProcessor(HTML_STRICT_RE, md), 'html', 90) 
    271        if configs['smart_dashes']: 
    272            self.educateDashes(md) 
    273        inlineProcessor = InlineProcessor(md) 
    274        inlineProcessor.inlinePatterns = self.inlinePatterns 
    275        md.treeprocessors.register(inlineProcessor, 'smarty', 6) 
    276        md.ESCAPED_CHARS.extend(['"', "'"]) 
    277 
    278 
    279def makeExtension(**kwargs):  # pragma: no cover 
    280    return SmartyExtension(**kwargs)