Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/filters/strings.py: 51%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

100 statements  

1"""String filters. 

2 

3Contains a collection of useful string manipulation filters for use in Jinja 

4templates. 

5""" 

6 

7# Copyright (c) IPython Development Team. 

8# Distributed under the terms of the Modified BSD License. 

9 

10import base64 

11import os 

12import re 

13import textwrap 

14import warnings 

15from urllib.parse import quote 

16from xml.etree.ElementTree import Element 

17 

18import bleach 

19 

20# defusedxml does safe(r) parsing of untrusted XML data 

21from defusedxml import ElementTree # type:ignore[import-untyped] 

22 

23from nbconvert.preprocessors.sanitize import _get_default_css_sanitizer 

24 

25__all__ = [ 

26 "wrap_text", 

27 "html2text", 

28 "clean_html", 

29 "add_anchor", 

30 "strip_dollars", 

31 "strip_files_prefix", 

32 "comment_lines", 

33 "get_lines", 

34 "ipython2python", 

35 "posix_path", 

36 "path2url", 

37 "add_prompts", 

38 "ascii_only", 

39 "prevent_list_blocks", 

40 "strip_trailing_newline", 

41 "text_base64", 

42] 

43 

44 

45def wrap_text(text, width=100): 

46 """ 

47 Intelligently wrap text. 

48 Wrap text without breaking words if possible. 

49 

50 Parameters 

51 ---------- 

52 text : str 

53 Text to wrap. 

54 width : int, optional 

55 Number of characters to wrap to, default 100. 

56 """ 

57 

58 split_text = text.split("\n") 

59 wrp = map(lambda x: textwrap.wrap(x, width), split_text) # noqa: C417 

60 wrpd = map("\n".join, wrp) 

61 return "\n".join(wrpd) 

62 

63 

64def html2text(element): 

65 """extract inner text from html 

66 

67 Analog of jQuery's $(element).text() 

68 """ 

69 if isinstance(element, (str,)): 

70 try: 

71 element = ElementTree.fromstring(element) 

72 except Exception: 

73 # failed to parse, just return it unmodified 

74 return element 

75 

76 text = element.text or "" 

77 for child in element: 

78 text += html2text(child) 

79 text += element.tail or "" 

80 return text 

81 

82 

83def clean_html(element): 

84 """Clean an html element.""" 

85 element = element.decode() if isinstance(element, bytes) else str(element) 

86 kwargs = {} 

87 css_sanitizer = _get_default_css_sanitizer() 

88 if css_sanitizer: 

89 kwargs["css_sanitizer"] = css_sanitizer 

90 return bleach.clean( 

91 element, 

92 tags=[*bleach.ALLOWED_TAGS, "div", "pre", "code", "span", "table", "tr", "td"], 

93 attributes={ 

94 **bleach.ALLOWED_ATTRIBUTES, 

95 "*": ["class", "id"], 

96 }, 

97 **kwargs, 

98 ) 

99 

100 

101def _convert_header_id(header_contents): 

102 """Convert header contents to valid id value. Takes string as input, returns string. 

103 

104 Note: this may be subject to change in the case of changes to how we wish to generate ids. 

105 

106 For use on markdown headings. 

107 """ 

108 # Valid IDs need to be non-empty and contain no space characters, but are otherwise arbitrary. 

109 # However, these IDs are also used in URL fragments, which are more restrictive, so we URL 

110 # encode any characters that are not valid in URL fragments. 

111 return quote(header_contents.replace(" ", "-"), safe="?/:@!$&'()*+,;=") 

112 

113 

114def add_anchor(html, anchor_link_text="¶"): 

115 """Add an id and an anchor-link to an html header 

116 

117 For use on markdown headings 

118 """ 

119 try: 

120 h = ElementTree.fromstring(html) 

121 except Exception: 

122 # failed to parse, just return it unmodified 

123 return html 

124 link = _convert_header_id(html2text(h)) 

125 h.set("id", link) 

126 a = Element("a", {"class": "anchor-link", "href": "#" + link}) 

127 try: 

128 # Test if the anchor link text is HTML (e.g. an image) 

129 a.append(ElementTree.fromstring(anchor_link_text)) 

130 except Exception: 

131 # If we fail to parse, assume we've just got regular text 

132 a.text = anchor_link_text 

133 h.append(a) 

134 

135 return ElementTree.tostring(h).decode(encoding="utf-8") 

136 

137 

138def add_prompts(code, first=">>> ", cont="... "): 

139 """Add prompts to code snippets""" 

140 new_code = [] 

141 code_list = code.split("\n") 

142 new_code.append(first + code_list[0]) 

143 for line in code_list[1:]: 

144 new_code.append(cont + line) 

145 return "\n".join(new_code) 

146 

147 

148def strip_dollars(text): 

149 """ 

150 Remove all dollar symbols from text 

151 

152 Parameters 

153 ---------- 

154 text : str 

155 Text to remove dollars from 

156 """ 

157 

158 return text.strip("$") 

159 

160 

161files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/') 

162markdown_url_pattern = re.compile(r"(!?)\[(?P<caption>.*?)\]\(/?files/(?P<location>.*?)\)") 

163 

164 

165def strip_files_prefix(text): 

166 """ 

167 Fix all fake URLs that start with ``files/``, stripping out the ``files/`` prefix. 

168 Applies to both urls (for html) and relative paths (for markdown paths). 

169 

170 Parameters 

171 ---------- 

172 text : str 

173 Text in which to replace 'src="files/real...' with 'src="real...' 

174 """ 

175 cleaned_text = files_url_pattern.sub(r"\1=\2", text) 

176 cleaned_text = markdown_url_pattern.sub(r"\1[\2](\3)", cleaned_text) 

177 return cleaned_text # noqa: RET504 

178 

179 

180def comment_lines(text, prefix="# "): 

181 """ 

182 Build a Python comment line from input text. 

183 

184 Parameters 

185 ---------- 

186 text : str 

187 Text to comment out. 

188 prefix : str 

189 Character to append to the start of each line. 

190 """ 

191 

192 # Replace line breaks with line breaks and comment symbols. 

193 # Also add a comment symbol at the beginning to comment out 

194 # the first line. 

195 return prefix + ("\n" + prefix).join(text.split("\n")) 

196 

197 

198def get_lines(text, start=None, end=None): 

199 """ 

200 Split the input text into separate lines and then return the 

201 lines that the caller is interested in. 

202 

203 Parameters 

204 ---------- 

205 text : str 

206 Text to parse lines from. 

207 start : int, optional 

208 First line to grab from. 

209 end : int, optional 

210 Last line to grab from. 

211 """ 

212 

213 # Split the input into lines. 

214 lines = text.split("\n") 

215 

216 # Return the right lines. 

217 return "\n".join(lines[start:end]) # re-join 

218 

219 

220def ipython2python(code): 

221 """Transform IPython syntax to pure Python syntax 

222 

223 Parameters 

224 ---------- 

225 code : str 

226 IPython code, to be transformed to pure Python 

227 """ 

228 try: 

229 from IPython.core.inputtransformer2 import TransformerManager 

230 except ImportError: 

231 warnings.warn( 

232 "IPython is needed to transform IPython syntax to pure Python." 

233 " Install ipython if you need this functionality.", 

234 stacklevel=2, 

235 ) 

236 return code 

237 else: 

238 isp = TransformerManager() 

239 return isp.transform_cell(code) 

240 

241 

242def posix_path(path): 

243 """Turn a path into posix-style path/to/etc 

244 

245 Mainly for use in latex on Windows, 

246 where native Windows paths are not allowed. 

247 """ 

248 if os.path.sep != "/": 

249 return path.replace(os.path.sep, "/") 

250 return path 

251 

252 

253def path2url(path): 

254 """Turn a file path into a URL""" 

255 parts = path.split(os.path.sep) 

256 return "/".join(quote(part) for part in parts) 

257 

258 

259def ascii_only(s): 

260 """ensure a string is ascii""" 

261 return s.encode("ascii", "replace").decode("ascii") 

262 

263 

264def prevent_list_blocks(s): 

265 """ 

266 Prevent presence of enumerate or itemize blocks in latex headings cells 

267 """ 

268 out = re.sub(r"(^\s*\d*)\.", r"\1\.", s) 

269 out = re.sub(r"(^\s*)\-", r"\1\-", out) 

270 out = re.sub(r"(^\s*)\+", r"\1\+", out) 

271 out = re.sub(r"(^\s*)\*", r"\1\*", out) 

272 return out # noqa: RET504 

273 

274 

275def strip_trailing_newline(text): 

276 """ 

277 Strips a newline from the end of text. 

278 """ 

279 if text.endswith("\n"): 

280 text = text[:-1] 

281 return text 

282 

283 

284def text_base64(text): 

285 """ 

286 Encode base64 text 

287 """ 

288 return base64.b64encode(text.encode()).decode()