Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/filters/strings.py: 51%

100 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1"""String filters. 

2 

3Contains a collection of useful string manipulation filters for use in Jinja 

4templates. 

5""" 

6 

7# Copyright (c) IPython Development Team. 

8# Distributed under the terms of the Modified BSD License. 

9 

10import base64 

11import os 

12import re 

13import textwrap 

14import warnings 

15from urllib.parse import quote 

16from xml.etree.ElementTree import Element 

17 

18import bleach 

19 

20# defusedxml does safe(r) parsing of untrusted XML data 

21from defusedxml import ElementTree # type:ignore 

22 

23from nbconvert.preprocessors.sanitize import _get_default_css_sanitizer 

24 

25__all__ = [ 

26 "wrap_text", 

27 "html2text", 

28 "clean_html", 

29 "add_anchor", 

30 "strip_dollars", 

31 "strip_files_prefix", 

32 "comment_lines", 

33 "get_lines", 

34 "ipython2python", 

35 "posix_path", 

36 "path2url", 

37 "add_prompts", 

38 "ascii_only", 

39 "prevent_list_blocks", 

40 "strip_trailing_newline", 

41 "text_base64", 

42] 

43 

44from nbconvert.filters.svg_constants import ALLOWED_SVG_ATTRIBUTES, ALLOWED_SVG_TAGS 

45 

46 

47def wrap_text(text, width=100): 

48 """ 

49 Intelligently wrap text. 

50 Wrap text without breaking words if possible. 

51 

52 Parameters 

53 ---------- 

54 text : str 

55 Text to wrap. 

56 width : int, optional 

57 Number of characters to wrap to, default 100. 

58 """ 

59 

60 split_text = text.split("\n") 

61 wrp = map(lambda x: textwrap.wrap(x, width), split_text) # noqa 

62 wrpd = map("\n".join, wrp) 

63 return "\n".join(wrpd) 

64 

65 

66def html2text(element): 

67 """extract inner text from html 

68 

69 Analog of jQuery's $(element).text() 

70 """ 

71 if isinstance(element, (str,)): 

72 try: 

73 element = ElementTree.fromstring(element) 

74 except Exception: 

75 # failed to parse, just return it unmodified 

76 return element 

77 

78 text = element.text or "" 

79 for child in element: 

80 text += html2text(child) 

81 text += element.tail or "" 

82 return text 

83 

84 

85def clean_html(element): 

86 """Clean an html element.""" 

87 element = element.decode() if isinstance(element, bytes) else str(element) 

88 kwargs = {} 

89 css_sanitizer = _get_default_css_sanitizer() 

90 if css_sanitizer: 

91 kwargs['css_sanitizer'] = css_sanitizer 

92 return bleach.clean( 

93 element, 

94 tags=[*bleach.ALLOWED_TAGS, *ALLOWED_SVG_TAGS, "div", "pre", "code", "span"], 

95 strip_comments=False, 

96 attributes={ 

97 **bleach.ALLOWED_ATTRIBUTES, 

98 **{svg_tag: list(ALLOWED_SVG_ATTRIBUTES) for svg_tag in ALLOWED_SVG_TAGS}, 

99 "*": ["class", "id"], 

100 }, 

101 **kwargs, 

102 ) 

103 

104 

105def _convert_header_id(header_contents): 

106 """Convert header contents to valid id value. Takes string as input, returns string. 

107 

108 Note: this may be subject to change in the case of changes to how we wish to generate ids. 

109 

110 For use on markdown headings. 

111 """ 

112 # Valid IDs need to be non-empty and contain no space characters, but are otherwise arbitrary. 

113 # However, these IDs are also used in URL fragments, which are more restrictive, so we URL 

114 # encode any characters that are not valid in URL fragments. 

115 return quote(header_contents.replace(" ", "-"), safe="?/:@!$&'()*+,;=") 

116 

117 

118def add_anchor(html, anchor_link_text="¶"): 

119 """Add an id and an anchor-link to an html header 

120 

121 For use on markdown headings 

122 """ 

123 try: 

124 h = ElementTree.fromstring(html) 

125 except Exception: 

126 # failed to parse, just return it unmodified 

127 return html 

128 link = _convert_header_id(html2text(h)) 

129 h.set("id", link) 

130 a = Element("a", {"class": "anchor-link", "href": "#" + link}) 

131 try: 

132 # Test if the anchor link text is HTML (e.g. an image) 

133 a.append(ElementTree.fromstring(anchor_link_text)) 

134 except Exception: 

135 # If we fail to parse, assume we've just got regular text 

136 a.text = anchor_link_text 

137 h.append(a) 

138 

139 return ElementTree.tostring(h).decode(encoding="utf-8") 

140 

141 

142def add_prompts(code, first=">>> ", cont="... "): 

143 """Add prompts to code snippets""" 

144 new_code = [] 

145 code_list = code.split("\n") 

146 new_code.append(first + code_list[0]) 

147 for line in code_list[1:]: 

148 new_code.append(cont + line) 

149 return "\n".join(new_code) 

150 

151 

152def strip_dollars(text): 

153 """ 

154 Remove all dollar symbols from text 

155 

156 Parameters 

157 ---------- 

158 text : str 

159 Text to remove dollars from 

160 """ 

161 

162 return text.strip("$") 

163 

164 

165files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/') 

166markdown_url_pattern = re.compile(r"(!?)\[(?P<caption>.*?)\]\(/?files/(?P<location>.*?)\)") 

167 

168 

169def strip_files_prefix(text): 

170 """ 

171 Fix all fake URLs that start with ``files/``, stripping out the ``files/`` prefix. 

172 Applies to both urls (for html) and relative paths (for markdown paths). 

173 

174 Parameters 

175 ---------- 

176 text : str 

177 Text in which to replace 'src="files/real...' with 'src="real...' 

178 """ 

179 cleaned_text = files_url_pattern.sub(r"\1=\2", text) 

180 cleaned_text = markdown_url_pattern.sub(r"\1[\2](\3)", cleaned_text) 

181 return cleaned_text 

182 

183 

184def comment_lines(text, prefix="# "): 

185 """ 

186 Build a Python comment line from input text. 

187 

188 Parameters 

189 ---------- 

190 text : str 

191 Text to comment out. 

192 prefix : str 

193 Character to append to the start of each line. 

194 """ 

195 

196 # Replace line breaks with line breaks and comment symbols. 

197 # Also add a comment symbol at the beginning to comment out 

198 # the first line. 

199 return prefix + ("\n" + prefix).join(text.split("\n")) 

200 

201 

202def get_lines(text, start=None, end=None): 

203 """ 

204 Split the input text into separate lines and then return the 

205 lines that the caller is interested in. 

206 

207 Parameters 

208 ---------- 

209 text : str 

210 Text to parse lines from. 

211 start : int, optional 

212 First line to grab from. 

213 end : int, optional 

214 Last line to grab from. 

215 """ 

216 

217 # Split the input into lines. 

218 lines = text.split("\n") 

219 

220 # Return the right lines. 

221 return "\n".join(lines[start:end]) # re-join 

222 

223 

224def ipython2python(code): 

225 """Transform IPython syntax to pure Python syntax 

226 

227 Parameters 

228 ---------- 

229 code : str 

230 IPython code, to be transformed to pure Python 

231 """ 

232 try: 

233 from IPython.core.inputtransformer2 import TransformerManager 

234 except ImportError: 

235 warnings.warn( 

236 "IPython is needed to transform IPython syntax to pure Python." 

237 " Install ipython if you need this functionality.", 

238 stacklevel=2, 

239 ) 

240 return code 

241 else: 

242 isp = TransformerManager() 

243 return isp.transform_cell(code) 

244 

245 

246def posix_path(path): 

247 """Turn a path into posix-style path/to/etc 

248 

249 Mainly for use in latex on Windows, 

250 where native Windows paths are not allowed. 

251 """ 

252 if os.path.sep != "/": 

253 return path.replace(os.path.sep, "/") 

254 return path 

255 

256 

257def path2url(path): 

258 """Turn a file path into a URL""" 

259 parts = path.split(os.path.sep) 

260 return "/".join(quote(part) for part in parts) 

261 

262 

263def ascii_only(s): 

264 """ensure a string is ascii""" 

265 return s.encode("ascii", "replace").decode("ascii") 

266 

267 

268def prevent_list_blocks(s): 

269 """ 

270 Prevent presence of enumerate or itemize blocks in latex headings cells 

271 """ 

272 out = re.sub(r"(^\s*\d*)\.", r"\1\.", s) 

273 out = re.sub(r"(^\s*)\-", r"\1\-", out) 

274 out = re.sub(r"(^\s*)\+", r"\1\+", out) 

275 out = re.sub(r"(^\s*)\*", r"\1\*", out) 

276 return out 

277 

278 

279def strip_trailing_newline(text): 

280 """ 

281 Strips a newline from the end of text. 

282 """ 

283 if text.endswith("\n"): 

284 text = text[:-1] 

285 return text 

286 

287 

288def text_base64(text): 

289 """ 

290 Encode base64 text 

291 """ 

292 return base64.b64encode(text.encode()).decode()