Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown/serializers.py: 71%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

97 statements  

1# Add x/html serialization to `Elementree` 

2# Taken from ElementTree 1.3 preview with slight modifications 

3# 

4# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. 

5# 

6# fredrik@pythonware.com 

7# https://www.pythonware.com/ 

8# 

9# -------------------------------------------------------------------- 

10# The ElementTree toolkit is 

11# 

12# Copyright (c) 1999-2007 by Fredrik Lundh 

13# 

14# By obtaining, using, and/or copying this software and/or its 

15# associated documentation, you agree that you have read, understood, 

16# and will comply with the following terms and conditions: 

17# 

18# Permission to use, copy, modify, and distribute this software and 

19# its associated documentation for any purpose and without fee is 

20# hereby granted, provided that the above copyright notice appears in 

21# all copies, and that both that copyright notice and this permission 

22# notice appear in supporting documentation, and that the name of 

23# Secret Labs AB or the author not be used in advertising or publicity 

24# pertaining to distribution of the software without specific, written 

25# prior permission. 

26# 

27# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 

28# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 

29# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 

30# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 

31# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 

32# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

33# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 

34# OF THIS SOFTWARE. 

35# -------------------------------------------------------------------- 

36 

37""" 

38Python-Markdown provides two serializers which render [`ElementTree.Element`][xml.etree.ElementTree.Element] 

39objects to a string of HTML. Both functions wrap the same underlying code with only a few minor 

40differences as outlined below: 

41 

421. Empty (self-closing) tags are rendered as `<tag>` for HTML and as `<tag />` for XHTML. 

432. Boolean attributes are rendered as `attrname` for HTML and as `attrname="attrname"` for XHTML. 

44""" 

45 

46from __future__ import annotations 

47 

48from xml.etree.ElementTree import ProcessingInstruction 

49from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY 

50import re 

51from typing import Callable, Literal, NoReturn 

52 

53__all__ = ['to_html_string', 'to_xhtml_string'] 

54 

55RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I) 

56 

57 

58def _raise_serialization_error(text: str) -> NoReturn: # pragma: no cover 

59 raise TypeError( 

60 "cannot serialize {!r} (type {})".format(text, type(text).__name__) 

61 ) 

62 

63 

64def _escape_cdata(text) -> str: 

65 # escape character data 

66 try: 

67 # it's worth avoiding do-nothing calls for strings that are 

68 # shorter than 500 character, or so. assume that's, by far, 

69 # the most common case in most applications. 

70 if "&" in text: 

71 # Only replace & when not part of an entity 

72 text = RE_AMP.sub('&amp;', text) 

73 if "<" in text: 

74 text = text.replace("<", "&lt;") 

75 if ">" in text: 

76 text = text.replace(">", "&gt;") 

77 return text 

78 except (TypeError, AttributeError): # pragma: no cover 

79 _raise_serialization_error(text) 

80 

81 

82def _escape_attrib(text: str) -> str: 

83 # escape attribute value 

84 try: 

85 if "&" in text: 

86 # Only replace & when not part of an entity 

87 text = RE_AMP.sub('&amp;', text) 

88 if "<" in text: 

89 text = text.replace("<", "&lt;") 

90 if ">" in text: 

91 text = text.replace(">", "&gt;") 

92 if "\"" in text: 

93 text = text.replace("\"", "&quot;") 

94 if "\n" in text: 

95 text = text.replace("\n", "&#10;") 

96 return text 

97 except (TypeError, AttributeError): # pragma: no cover 

98 _raise_serialization_error(text) 

99 

100 

101def _escape_attrib_html(text: str) -> str: 

102 # escape attribute value 

103 try: 

104 if "&" in text: 

105 # Only replace & when not part of an entity 

106 text = RE_AMP.sub('&amp;', text) 

107 if "<" in text: 

108 text = text.replace("<", "&lt;") 

109 if ">" in text: 

110 text = text.replace(">", "&gt;") 

111 if "\"" in text: 

112 text = text.replace("\"", "&quot;") 

113 return text 

114 except (TypeError, AttributeError): # pragma: no cover 

115 _raise_serialization_error(text) 

116 

117 

118def _serialize_html(write: Callable[[str], None], elem: Element, format: Literal["html", "xhtml"]) -> None: 

119 tag = elem.tag 

120 text = elem.text 

121 if tag is Comment: 

122 write("<!--%s-->" % _escape_cdata(text)) 

123 elif tag is ProcessingInstruction: 

124 write("<?%s?>" % _escape_cdata(text)) 

125 elif tag is None: 

126 if text: 

127 write(_escape_cdata(text)) 

128 for e in elem: 

129 _serialize_html(write, e, format) 

130 else: 

131 namespace_uri = None 

132 if isinstance(tag, QName): 

133 # `QNAME` objects store their data as a string: `{uri}tag` 

134 if tag.text[:1] == "{": 

135 namespace_uri, tag = tag.text[1:].split("}", 1) 

136 else: 

137 raise ValueError('QName objects must define a tag.') 

138 write("<" + tag) 

139 items = elem.items() 

140 if items: 

141 items = sorted(items) # lexical order 

142 for k, v in items: 

143 if isinstance(k, QName): 

144 # Assume a text only `QName` 

145 k = k.text 

146 if isinstance(v, QName): 

147 # Assume a text only `QName` 

148 v = v.text 

149 else: 

150 v = _escape_attrib_html(v) 

151 if k == v and format == 'html': 

152 # handle boolean attributes 

153 write(" %s" % v) 

154 else: 

155 write(' {}="{}"'.format(k, v)) 

156 if namespace_uri: 

157 write(' xmlns="%s"' % (_escape_attrib(namespace_uri))) 

158 if format == "xhtml" and tag.lower() in HTML_EMPTY: 

159 write(" />") 

160 else: 

161 write(">") 

162 if text: 

163 if tag.lower() in ["script", "style"]: 

164 write(text) 

165 else: 

166 write(_escape_cdata(text)) 

167 for e in elem: 

168 _serialize_html(write, e, format) 

169 if tag.lower() not in HTML_EMPTY: 

170 write("</" + tag + ">") 

171 if elem.tail: 

172 write(_escape_cdata(elem.tail)) 

173 

174 

175def _write_html(root: Element, format: Literal["html", "xhtml"] = "html") -> str: 

176 assert root is not None 

177 data: list[str] = [] 

178 write = data.append 

179 _serialize_html(write, root, format) 

180 return "".join(data) 

181 

182 

183# -------------------------------------------------------------------- 

184# public functions 

185 

186 

187def to_html_string(element: Element) -> str: 

188 """ Serialize element and its children to a string of HTML5. """ 

189 return _write_html(ElementTree(element).getroot(), format="html") 

190 

191 

192def to_xhtml_string(element: Element) -> str: 

193 """ Serialize element and its children to a string of XHTML. """ 

194 return _write_html(ElementTree(element).getroot(), format="xhtml")