Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/filters/strings.py: 51%

1"""String filters.

3Contains a collection of useful string manipulation filters for use in Jinja

4templates.

5"""

7# Copyright (c) IPython Development Team.

8# Distributed under the terms of the Modified BSD License.

10import base64

11import os

12import re

13import textwrap

14import warnings

15from urllib.parse import quote

16from xml.etree.ElementTree import Element

18import bleach

20# defusedxml does safe(r) parsing of untrusted XML data

21from defusedxml import ElementTree # type:ignore

23from nbconvert.preprocessors.sanitize import _get_default_css_sanitizer

25__all__ = [

26 "wrap_text",

27 "html2text",

28 "clean_html",

29 "add_anchor",

30 "strip_dollars",

31 "strip_files_prefix",

32 "comment_lines",

33 "get_lines",

34 "ipython2python",

35 "posix_path",

36 "path2url",

37 "add_prompts",

38 "ascii_only",

39 "prevent_list_blocks",

40 "strip_trailing_newline",

41 "text_base64",

42]

44from nbconvert.filters.svg_constants import ALLOWED_SVG_ATTRIBUTES, ALLOWED_SVG_TAGS

47def wrap_text(text, width=100):

48 """

49 Intelligently wrap text.

50 Wrap text without breaking words if possible.

52 Parameters

53 ----------

54 text : str

55 Text to wrap.

56 width : int, optional

57 Number of characters to wrap to, default 100.

58 """

60 split_text = text.split("\n")

61 wrp = map(lambda x: textwrap.wrap(x, width), split_text) # noqa

62 wrpd = map("\n".join, wrp)

63 return "\n".join(wrpd)

66def html2text(element):

67 """extract inner text from html

69 Analog of jQuery's $(element).text()

70 """

71 if isinstance(element, (str,)):

72 try:

73 element = ElementTree.fromstring(element)

74 except Exception:

75 # failed to parse, just return it unmodified

76 return element

78 text = element.text or ""

79 for child in element:

80 text += html2text(child)

81 text += element.tail or ""

82 return text

85def clean_html(element):

86 """Clean an html element."""

87 element = element.decode() if isinstance(element, bytes) else str(element)

88 kwargs = {}

89 css_sanitizer = _get_default_css_sanitizer()

90 if css_sanitizer:

91 kwargs['css_sanitizer'] = css_sanitizer

92 return bleach.clean(

93 element,

94 tags=[*bleach.ALLOWED_TAGS, *ALLOWED_SVG_TAGS, "div", "pre", "code", "span"],

95 strip_comments=False,

96 attributes={

97 **bleach.ALLOWED_ATTRIBUTES,

98 **{svg_tag: list(ALLOWED_SVG_ATTRIBUTES) for svg_tag in ALLOWED_SVG_TAGS},

99 "*": ["class", "id"],

100 },

101 **kwargs,

102 )

103

104

105def _convert_header_id(header_contents):

106 """Convert header contents to valid id value. Takes string as input, returns string.

107

108 Note: this may be subject to change in the case of changes to how we wish to generate ids.

109

110 For use on markdown headings.

111 """

112 # Valid IDs need to be non-empty and contain no space characters, but are otherwise arbitrary.

113 # However, these IDs are also used in URL fragments, which are more restrictive, so we URL

114 # encode any characters that are not valid in URL fragments.

115 return quote(header_contents.replace(" ", "-"), safe="?/:@!$&'()*+,;=")

116

117

118def add_anchor(html, anchor_link_text="¶"):

119 """Add an id and an anchor-link to an html header

120

121 For use on markdown headings

122 """

123 try:

124 h = ElementTree.fromstring(html)

125 except Exception:

126 # failed to parse, just return it unmodified

127 return html

128 link = _convert_header_id(html2text(h))

129 h.set("id", link)

130 a = Element("a", {"class": "anchor-link", "href": "#" + link})

131 try:

132 # Test if the anchor link text is HTML (e.g. an image)

133 a.append(ElementTree.fromstring(anchor_link_text))

134 except Exception:

135 # If we fail to parse, assume we've just got regular text

136 a.text = anchor_link_text

137 h.append(a)

138

139 return ElementTree.tostring(h).decode(encoding="utf-8")

140

141

142def add_prompts(code, first=">>> ", cont="... "):

143 """Add prompts to code snippets"""

144 new_code = []

145 code_list = code.split("\n")

146 new_code.append(first + code_list[0])

147 for line in code_list[1:]:

148 new_code.append(cont + line)

149 return "\n".join(new_code)

150

151

152def strip_dollars(text):

153 """

154 Remove all dollar symbols from text

155

156 Parameters

157 ----------

158 text : str

159 Text to remove dollars from

160 """

161

162 return text.strip("$")

163

164

165files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/')

166markdown_url_pattern = re.compile(r"(!?)\[(?P<caption>.*?)\]$/?files/(?P<location>.*?)$")

167

168

169def strip_files_prefix(text):

170 """

171 Fix all fake URLs that start with ``files/``, stripping out the ``files/`` prefix.

172 Applies to both urls (for html) and relative paths (for markdown paths).

173

174 Parameters

175 ----------

176 text : str

177 Text in which to replace 'src="files/real...' with 'src="real...'

178 """

179 cleaned_text = files_url_pattern.sub(r"\1=\2", text)

180 cleaned_text = markdown_url_pattern.sub(r"\1[\2](\3)", cleaned_text)

181 return cleaned_text

182

183

184def comment_lines(text, prefix="# "):

185 """

186 Build a Python comment line from input text.

187

188 Parameters

189 ----------

190 text : str

191 Text to comment out.

192 prefix : str

193 Character to append to the start of each line.

194 """

195

196 # Replace line breaks with line breaks and comment symbols.

197 # Also add a comment symbol at the beginning to comment out

198 # the first line.

199 return prefix + ("\n" + prefix).join(text.split("\n"))

200

201

202def get_lines(text, start=None, end=None):

203 """

204 Split the input text into separate lines and then return the

205 lines that the caller is interested in.

206

207 Parameters

208 ----------

209 text : str

210 Text to parse lines from.

211 start : int, optional

212 First line to grab from.

213 end : int, optional

214 Last line to grab from.

215 """

216

217 # Split the input into lines.

218 lines = text.split("\n")

219

220 # Return the right lines.

221 return "\n".join(lines[start:end]) # re-join

222

223

224def ipython2python(code):

225 """Transform IPython syntax to pure Python syntax

226

227 Parameters

228 ----------

229 code : str

230 IPython code, to be transformed to pure Python

231 """

232 try:

233 from IPython.core.inputtransformer2 import TransformerManager

234 except ImportError:

235 warnings.warn(

236 "IPython is needed to transform IPython syntax to pure Python."

237 " Install ipython if you need this functionality.",

238 stacklevel=2,

239 )

240 return code

241 else:

242 isp = TransformerManager()

243 return isp.transform_cell(code)

244

245

246def posix_path(path):

247 """Turn a path into posix-style path/to/etc

248

249 Mainly for use in latex on Windows,

250 where native Windows paths are not allowed.

251 """

252 if os.path.sep != "/":

253 return path.replace(os.path.sep, "/")

254 return path

255

256

257def path2url(path):

258 """Turn a file path into a URL"""

259 parts = path.split(os.path.sep)

260 return "/".join(quote(part) for part in parts)

261

262

263def ascii_only(s):

264 """ensure a string is ascii"""

265 return s.encode("ascii", "replace").decode("ascii")

266

267

268def prevent_list_blocks(s):

269 """

270 Prevent presence of enumerate or itemize blocks in latex headings cells

271 """

272 out = re.sub(r"(^\s*\d*)\.", r"\1\.", s)

273 out = re.sub(r"(^\s*)\-", r"\1\-", out)

274 out = re.sub(r"(^\s*)\+", r"\1\+", out)

275 out = re.sub(r"(^\s*)\*", r"\1\*", out)

276 return out

277

278

279def strip_trailing_newline(text):

280 """

281 Strips a newline from the end of text.

282 """

283 if text.endswith("\n"):

284 text = text[:-1]

285 return text

286

287

288def text_base64(text):

289 """

290 Encode base64 text

291 """

292 return base64.b64encode(text.encode()).decode()