1# Fenced Code Extension for Python Markdown
2# =========================================
3
4# This extension adds Fenced Code Blocks to Python-Markdown.
5
6# See https://Python-Markdown.github.io/extensions/fenced_code_blocks
7# for documentation.
8
9# Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/).
10
11# All changes Copyright 2008-2014 The Python Markdown Project
12
13# License: [BSD](https://opensource.org/licenses/bsd-license.php)
14
15"""
16This extension adds Fenced Code Blocks to Python-Markdown.
17
18See the [documentation](https://Python-Markdown.github.io/extensions/fenced_code_blocks)
19for details.
20"""
21
22from __future__ import annotations
23
24from textwrap import dedent
25from . import Extension
26from ..preprocessors import Preprocessor
27from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines
28from .attr_list import get_attrs_and_remainder, AttrListExtension
29from ..util import parseBoolValue
30from ..serializers import _escape_attrib_html
31import re
32from typing import TYPE_CHECKING, Any, Iterable
33
34if TYPE_CHECKING: # pragma: no cover
35 from markdown import Markdown
36
37
38class FencedCodeExtension(Extension):
39 def __init__(self, **kwargs):
40 self.config = {
41 'lang_prefix': ['language-', 'Prefix prepended to the language. Default: "language-"']
42 }
43 """ Default configuration options. """
44 super().__init__(**kwargs)
45
46 def extendMarkdown(self, md):
47 """ Add `FencedBlockPreprocessor` to the Markdown instance. """
48 md.registerExtension(self)
49
50 md.preprocessors.register(FencedBlockPreprocessor(md, self.getConfigs()), 'fenced_code_block', 25)
51
52
53class FencedBlockPreprocessor(Preprocessor):
54 """ Find and extract fenced code blocks. """
55
56 FENCED_BLOCK_RE = re.compile(
57 dedent(r'''
58 (?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence
59 ((\{(?P<attrs>[^\n]*)\})| # (optional {attrs} or
60 (\.?(?P<lang>[\w#.+-]*)[ ]*)? # optional (.)lang
61 (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot)[ ]*)?) # optional hl_lines)
62 \n # newline (end of opening fence)
63 (?P<code>.*?)(?<=\n) # the code block
64 (?P=fence)[ ]*$ # closing fence
65 '''),
66 re.MULTILINE | re.DOTALL | re.VERBOSE
67 )
68
69 def __init__(self, md: Markdown, config: dict[str, Any]):
70 super().__init__(md)
71 self.config = config
72 self.checked_for_deps = False
73 self.codehilite_conf: dict[str, Any] = {}
74 self.use_attr_list = False
75 # List of options to convert to boolean values
76 self.bool_options = [
77 'linenums',
78 'guess_lang',
79 'noclasses',
80 'use_pygments'
81 ]
82
83 def run(self, lines: list[str]) -> list[str]:
84 """ Match and store Fenced Code Blocks in the `HtmlStash`. """
85
86 # Check for dependent extensions
87 if not self.checked_for_deps:
88 for ext in self.md.registeredExtensions:
89 if isinstance(ext, CodeHiliteExtension):
90 self.codehilite_conf = ext.getConfigs()
91 if isinstance(ext, AttrListExtension):
92 self.use_attr_list = True
93
94 self.checked_for_deps = True
95
96 text = "\n".join(lines)
97 index = 0
98 while 1:
99 m = self.FENCED_BLOCK_RE.search(text, index)
100 if m:
101 lang, id, classes, config = None, '', [], {}
102 if m.group('attrs'):
103 attrs, remainder = get_attrs_and_remainder(m.group('attrs'))
104 if remainder: # Does not have correctly matching curly braces, so the syntax is invalid.
105 index = m.end('attrs') # Explicitly skip over this, to prevent an infinite loop.
106 continue
107 id, classes, config = self.handle_attrs(attrs)
108 if len(classes):
109 lang = classes.pop(0)
110 else:
111 if m.group('lang'):
112 lang = m.group('lang')
113 if m.group('hl_lines'):
114 # Support `hl_lines` outside of `attrs` for backward-compatibility
115 config['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
116
117 # If `config` is not empty, then the `codehighlite` extension
118 # is enabled, so we call it to highlight the code
119 if self.codehilite_conf and self.codehilite_conf['use_pygments'] and config.get('use_pygments', True):
120 local_config = self.codehilite_conf.copy()
121 local_config.update(config)
122 # Combine classes with `cssclass`. Ensure `cssclass` is at end
123 # as Pygments appends a suffix under certain circumstances.
124 # Ignore ID as Pygments does not offer an option to set it.
125 if classes:
126 local_config['css_class'] = '{} {}'.format(
127 ' '.join(classes),
128 local_config['css_class']
129 )
130 highliter = CodeHilite(
131 m.group('code'),
132 lang=lang,
133 style=local_config.pop('pygments_style', 'default'),
134 **local_config
135 )
136
137 code = highliter.hilite(shebang=False)
138 else:
139 id_attr = lang_attr = class_attr = kv_pairs = ''
140 if lang:
141 prefix = self.config.get('lang_prefix', 'language-')
142 lang_attr = f' class="{prefix}{_escape_attrib_html(lang)}"'
143 if classes:
144 class_attr = f' class="{_escape_attrib_html(" ".join(classes))}"'
145 if id:
146 id_attr = f' id="{_escape_attrib_html(id)}"'
147 if self.use_attr_list and config and not config.get('use_pygments', False):
148 # Only assign key/value pairs to code element if `attr_list` extension is enabled, key/value
149 # pairs were defined on the code block, and the `use_pygments` key was not set to `True`. The
150 # `use_pygments` key could be either set to `False` or not defined. It is omitted from output.
151 kv_pairs = ''.join(
152 f' {k}="{_escape_attrib_html(v)}"' for k, v in config.items() if k != 'use_pygments'
153 )
154 code = self._escape(m.group('code'))
155 code = f'<pre{id_attr}{class_attr}><code{lang_attr}{kv_pairs}>{code}</code></pre>'
156
157 placeholder = self.md.htmlStash.store(code)
158 text = f'{text[:m.start()]}\n{placeholder}\n{text[m.end():]}'
159 # Continue from after the replaced text in the next iteration.
160 index = m.start() + 1 + len(placeholder)
161 else:
162 break
163 return text.split("\n")
164
165 def handle_attrs(self, attrs: Iterable[tuple[str, str]]) -> tuple[str, list[str], dict[str, Any]]:
166 """ Return tuple: `(id, [list, of, classes], {configs})` """
167 id = ''
168 classes = []
169 configs = {}
170 for k, v in attrs:
171 if k == 'id':
172 id = v
173 elif k == '.':
174 classes.append(v)
175 elif k == 'hl_lines':
176 configs[k] = parse_hl_lines(v)
177 elif k in self.bool_options:
178 configs[k] = parseBoolValue(v, fail_on_errors=False, preserve_none=True)
179 else:
180 configs[k] = v
181 return id, classes, configs
182
183 def _escape(self, txt: str) -> str:
184 """ basic html escaping """
185 txt = txt.replace('&', '&')
186 txt = txt.replace('<', '<')
187 txt = txt.replace('>', '>')
188 txt = txt.replace('"', '"')
189 return txt
190
191
192def makeExtension(**kwargs): # pragma: no cover
193 return FencedCodeExtension(**kwargs)