1from typing import Any, ClassVar, Dict, Optional, Tuple, Literal
2from ..core import BaseRenderer, BlockState
3from ..util import escape as escape_text
4from ..util import safe_entity, striptags
5
6
7class HTMLRenderer(BaseRenderer):
8 """A renderer for converting Markdown to HTML."""
9
10 _escape: bool
11 NAME: ClassVar[Literal["html"]] = "html"
12 HARMFUL_PROTOCOLS: ClassVar[Tuple[str, ...]] = (
13 "javascript:",
14 "vbscript:",
15 "file:",
16 "data:",
17 )
18 GOOD_DATA_PROTOCOLS: ClassVar[Tuple[str, ...]] = (
19 "data:image/gif;",
20 "data:image/png;",
21 "data:image/jpeg;",
22 "data:image/webp;",
23 )
24
25 def __init__(self, escape: bool = True, allow_harmful_protocols: Optional[bool] = None) -> None:
26 super(HTMLRenderer, self).__init__()
27 self._allow_harmful_protocols = allow_harmful_protocols
28 self._escape = escape
29
30 def render_token(self, token: Dict[str, Any], state: BlockState) -> str:
31 # backward compitable with v2
32 func = self._get_method(token["type"])
33 attrs = token.get("attrs")
34
35 if "raw" in token:
36 text = token["raw"]
37 elif "children" in token:
38 text = self.render_tokens(token["children"], state)
39 else:
40 if attrs:
41 return func(**attrs)
42 else:
43 return func()
44 if attrs:
45 return func(text, **attrs)
46 else:
47 return func(text)
48
49 def safe_url(self, url: str) -> str:
50 """Ensure the given URL is safe. This method is used for rendering
51 links, images, and etc.
52 """
53 if self._allow_harmful_protocols is True:
54 return escape_text(url)
55
56 _url = url.lower()
57 if self._allow_harmful_protocols and _url.startswith(tuple(self._allow_harmful_protocols)):
58 return escape_text(url)
59
60 if _url.startswith(self.HARMFUL_PROTOCOLS) and not _url.startswith(self.GOOD_DATA_PROTOCOLS):
61 return "#harmful-link"
62 return escape_text(url)
63
64 def text(self, text: str) -> str:
65 if self._escape:
66 return escape_text(text)
67 return safe_entity(text)
68
69 def emphasis(self, text: str) -> str:
70 return "<em>" + text + "</em>"
71
72 def strong(self, text: str) -> str:
73 return "<strong>" + text + "</strong>"
74
75 def link(self, text: str, url: str, title: Optional[str] = None) -> str:
76 s = '<a href="' + self.safe_url(url) + '"'
77 if title:
78 s += ' title="' + safe_entity(title) + '"'
79 return s + ">" + text + "</a>"
80
81 def image(self, text: str, url: str, title: Optional[str] = None) -> str:
82 src = self.safe_url(url)
83 alt = escape_text(striptags(text))
84 s = '<img src="' + src + '" alt="' + alt + '"'
85 if title:
86 s += ' title="' + safe_entity(title) + '"'
87 return s + " />"
88
89 def codespan(self, text: str) -> str:
90 return "<code>" + escape_text(text) + "</code>"
91
92 def linebreak(self) -> str:
93 return "<br />\n"
94
95 def softbreak(self) -> str:
96 return "\n"
97
98 def inline_html(self, html: str) -> str:
99 if self._escape:
100 return escape_text(html)
101 return html
102
103 def paragraph(self, text: str) -> str:
104 return "<p>" + text + "</p>\n"
105
106 def heading(self, text: str, level: int, **attrs: Any) -> str:
107 tag = "h" + str(level)
108 html = "<" + tag
109 _id = attrs.get("id")
110 if _id:
111 html += ' id="' + _id + '"'
112 return html + ">" + text + "</" + tag + ">\n"
113
114 def blank_line(self) -> str:
115 return ""
116
117 def thematic_break(self) -> str:
118 return "<hr />\n"
119
120 def block_text(self, text: str) -> str:
121 return text
122
123 def block_code(self, code: str, info: Optional[str] = None) -> str:
124 html = "<pre><code"
125 if info is not None:
126 info = safe_entity(info.strip())
127 if info:
128 lang = info.split(None, 1)[0]
129 html += ' class="language-' + lang + '"'
130 return html + ">" + escape_text(code) + "</code></pre>\n"
131
132 def block_quote(self, text: str) -> str:
133 return "<blockquote>\n" + text + "</blockquote>\n"
134
135 def block_html(self, html: str) -> str:
136 if self._escape:
137 return "<p>" + escape_text(html.strip()) + "</p>\n"
138 return html + "\n"
139
140 def block_error(self, text: str) -> str:
141 return '<div class="error"><pre>' + text + "</pre></div>\n"
142
143 def list(self, text: str, ordered: bool, **attrs: Any) -> str:
144 if ordered:
145 html = "<ol"
146 start = attrs.get("start")
147 if start is not None:
148 html += ' start="' + str(start) + '"'
149 return html + ">\n" + text + "</ol>\n"
150 return "<ul>\n" + text + "</ul>\n"
151
152 def list_item(self, text: str) -> str:
153 return "<li>" + text + "</li>\n"