1# Smarty extension for Python-Markdown
2# ====================================
3
4# Adds conversion of ASCII dashes, quotes and ellipses to their HTML
5# entity equivalents.
6
7# See https://Python-Markdown.github.io/extensions/smarty
8# for documentation.
9
10# Author: 2013, Dmitry Shachnev <mitya57@gmail.com>
11
12# All changes Copyright 2013-2014 The Python Markdown Project
13
14# License: [BSD](https://opensource.org/licenses/bsd-license.php)
15
16# SmartyPants license:
17
18# Copyright (c) 2003 John Gruber <https://daringfireball.net/>
19# All rights reserved.
20
21# Redistribution and use in source and binary forms, with or without
22# modification, are permitted provided that the following conditions are
23# met:
24
25# * Redistributions of source code must retain the above copyright
26# notice, this list of conditions and the following disclaimer.
27
28# * Redistributions in binary form must reproduce the above copyright
29# notice, this list of conditions and the following disclaimer in
30# the documentation and/or other materials provided with the
31# distribution.
32
33# * Neither the name "SmartyPants" nor the names of its contributors
34# may be used to endorse or promote products derived from this
35# software without specific prior written permission.
36
37# This software is provided by the copyright holders and contributors "as
38# is" and any express or implied warranties, including, but not limited
39# to, the implied warranties of merchantability and fitness for a
40# particular purpose are disclaimed. In no event shall the copyright
41# owner or contributors be liable for any direct, indirect, incidental,
42# special, exemplary, or consequential damages (including, but not
43# limited to, procurement of substitute goods or services; loss of use,
44# data, or profits; or business interruption) however caused and on any
45# theory of liability, whether in contract, strict liability, or tort
46# (including negligence or otherwise) arising in any way out of the use
47# of this software, even if advised of the possibility of such damage.
48
49
50# `smartypants.py` license:
51
52# `smartypants.py` is a derivative work of SmartyPants.
53# Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/>
54
55# Redistribution and use in source and binary forms, with or without
56# modification, are permitted provided that the following conditions are
57# met:
58
59# * Redistributions of source code must retain the above copyright
60# notice, this list of conditions and the following disclaimer.
61
62# * Redistributions in binary form must reproduce the above copyright
63# notice, this list of conditions and the following disclaimer in
64# the documentation and/or other materials provided with the
65# distribution.
66
67# This software is provided by the copyright holders and contributors "as
68# is" and any express or implied warranties, including, but not limited
69# to, the implied warranties of merchantability and fitness for a
70# particular purpose are disclaimed. In no event shall the copyright
71# owner or contributors be liable for any direct, indirect, incidental,
72# special, exemplary, or consequential damages (including, but not
73# limited to, procurement of substitute goods or services; loss of use,
74# data, or profits; or business interruption) however caused and on any
75# theory of liability, whether in contract, strict liability, or tort
76# (including negligence or otherwise) arising in any way out of the use
77# of this software, even if advised of the possibility of such damage.
78
79"""
80Convert ASCII dashes, quotes and ellipses to their HTML entity equivalents.
81
82See the [documentation](https://Python-Markdown.github.io/extensions/smarty)
83for details.
84"""
85
86from __future__ import annotations
87
88from . import Extension
89from ..inlinepatterns import HtmlInlineProcessor, HTML_RE
90from ..treeprocessors import InlineProcessor
91from ..util import Registry
92from typing import TYPE_CHECKING, Sequence
93
94if TYPE_CHECKING: # pragma: no cover
95 from markdown import Markdown
96 from .. import inlinepatterns
97 import re
98 import xml.etree.ElementTree as etree
99
100# Constants for quote education.
101punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
102endOfWordClass = r"[\s.,;:!?)]"
103closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]"
104
105openingQuotesBase = (
106 r'(\s' # a whitespace char
107 r'| ' # or a non-breaking space entity
108 r'|--' # or dashes
109 r'|–|—' # or Unicode
110 r'|&[mn]dash;' # or named dash entities
111 r'|–|—' # or decimal entities
112 r')'
113)
114
115substitutions = {
116 'mdash': '—',
117 'ndash': '–',
118 'ellipsis': '…',
119 'left-angle-quote': '«',
120 'right-angle-quote': '»',
121 'left-single-quote': '‘',
122 'right-single-quote': '’',
123 'left-double-quote': '“',
124 'right-double-quote': '”',
125}
126
127
128# Special case if the very first character is a quote
129# followed by punctuation at a non-word-break. Close the quotes by brute force:
130singleQuoteStartRe = r"^'(?=%s\B)" % punctClass
131doubleQuoteStartRe = r'^"(?=%s\B)' % punctClass
132
133# Special case for double sets of quotes, e.g.:
134# <p>He said, "'Quoted' words in a larger quote."</p>
135doubleQuoteSetsRe = r""""'(?=\w)"""
136singleQuoteSetsRe = r"""'"(?=\w)"""
137doubleQuoteSetsRe2 = r'(?<=%s)\'"' % closeClass
138singleQuoteSetsRe2 = r"(?<=%s)\"'" % closeClass
139
140# Special case for decade abbreviations (the '80s):
141decadeAbbrRe = r"(?<!\w)'(?=\d{2}s)"
142
143# Get most opening double quotes:
144openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase
145
146# Double closing quotes:
147closingDoubleQuotesRegex = r'"(?=\s)'
148closingDoubleQuotesRegex2 = r'(?<=%s)"' % closeClass
149
150# Get most opening single quotes:
151openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase
152
153# Single closing quotes:
154closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass
155closingSingleQuotesRegex2 = r"'(\s|s\b)"
156
157# All remaining quotes should be opening ones
158remainingSingleQuotesRegex = r"'"
159remainingDoubleQuotesRegex = r'"'
160
161HTML_STRICT_RE = HTML_RE + r'(?!\>)'
162
163
164class SubstituteTextPattern(HtmlInlineProcessor):
165 def __init__(self, pattern: str, replace: Sequence[int | str | etree.Element], md: Markdown):
166 """ Replaces matches with some text. """
167 HtmlInlineProcessor.__init__(self, pattern)
168 self.replace = replace
169 self.md = md
170
171 def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]:
172 result = ''
173 for part in self.replace:
174 if isinstance(part, int):
175 result += m.group(part)
176 else:
177 result += self.md.htmlStash.store(part)
178 return result, m.start(0), m.end(0)
179
180
181class SmartyExtension(Extension):
182 """ Add Smarty to Markdown. """
183 def __init__(self, **kwargs):
184 self.config = {
185 'smart_quotes': [True, 'Educate quotes'],
186 'smart_angled_quotes': [False, 'Educate angled quotes'],
187 'smart_dashes': [True, 'Educate dashes'],
188 'smart_ellipses': [True, 'Educate ellipses'],
189 'substitutions': [{}, 'Overwrite default substitutions'],
190 }
191 """ Default configuration options. """
192 super().__init__(**kwargs)
193 self.substitutions: dict[str, str] = dict(substitutions)
194 self.substitutions.update(self.getConfig('substitutions', default={}))
195
196 def _addPatterns(
197 self,
198 md: Markdown,
199 patterns: Sequence[tuple[str, Sequence[int | str | etree.Element]]],
200 serie: str,
201 priority: int,
202 ):
203 for ind, pattern in enumerate(patterns):
204 pattern += (md,)
205 pattern = SubstituteTextPattern(*pattern)
206 name = 'smarty-%s-%d' % (serie, ind)
207 self.inlinePatterns.register(pattern, name, priority-ind)
208
209 def educateDashes(self, md: Markdown) -> None:
210 emDashesPattern = SubstituteTextPattern(
211 r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md
212 )
213 enDashesPattern = SubstituteTextPattern(
214 r'(?<!-)--(?!-)', (self.substitutions['ndash'],), md
215 )
216 self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50)
217 self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45)
218
219 def educateEllipses(self, md: Markdown) -> None:
220 ellipsesPattern = SubstituteTextPattern(
221 r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md
222 )
223 self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10)
224
225 def educateAngledQuotes(self, md: Markdown) -> None:
226 leftAngledQuotePattern = SubstituteTextPattern(
227 r'\<\<', (self.substitutions['left-angle-quote'],), md
228 )
229 rightAngledQuotePattern = SubstituteTextPattern(
230 r'\>\>', (self.substitutions['right-angle-quote'],), md
231 )
232 self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40)
233 self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35)
234
235 def educateQuotes(self, md: Markdown) -> None:
236 lsquo = self.substitutions['left-single-quote']
237 rsquo = self.substitutions['right-single-quote']
238 ldquo = self.substitutions['left-double-quote']
239 rdquo = self.substitutions['right-double-quote']
240 patterns = (
241 (singleQuoteStartRe, (rsquo,)),
242 (doubleQuoteStartRe, (rdquo,)),
243 (doubleQuoteSetsRe, (ldquo + lsquo,)),
244 (singleQuoteSetsRe, (lsquo + ldquo,)),
245 (doubleQuoteSetsRe2, (rsquo + rdquo,)),
246 (singleQuoteSetsRe2, (rdquo + rsquo,)),
247 (decadeAbbrRe, (rsquo,)),
248 (openingSingleQuotesRegex, (1, lsquo)),
249 (closingSingleQuotesRegex, (rsquo,)),
250 (closingSingleQuotesRegex2, (rsquo, 1)),
251 (remainingSingleQuotesRegex, (lsquo,)),
252 (openingDoubleQuotesRegex, (1, ldquo)),
253 (closingDoubleQuotesRegex, (rdquo,)),
254 (closingDoubleQuotesRegex2, (rdquo,)),
255 (remainingDoubleQuotesRegex, (ldquo,))
256 )
257 self._addPatterns(md, patterns, 'quotes', 30)
258
259 def extendMarkdown(self, md):
260 configs = self.getConfigs()
261 self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry()
262 if configs['smart_ellipses']:
263 self.educateEllipses(md)
264 if configs['smart_quotes']:
265 self.educateQuotes(md)
266 if configs['smart_angled_quotes']:
267 self.educateAngledQuotes(md)
268 # Override `HTML_RE` from `inlinepatterns.py` so that it does not
269 # process tags with duplicate closing quotes.
270 md.inlinePatterns.register(HtmlInlineProcessor(HTML_STRICT_RE, md), 'html', 90)
271 if configs['smart_dashes']:
272 self.educateDashes(md)
273 inlineProcessor = InlineProcessor(md)
274 inlineProcessor.inlinePatterns = self.inlinePatterns
275 md.treeprocessors.register(inlineProcessor, 'smarty', 6)
276 md.ESCAPED_CHARS.extend(['"', "'"])
277
278
279def makeExtension(**kwargs): # pragma: no cover
280 return SmartyExtension(**kwargs)