Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/filters/strings.py: 51%
100 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""String filters.
3Contains a collection of useful string manipulation filters for use in Jinja
4templates.
5"""
7# Copyright (c) IPython Development Team.
8# Distributed under the terms of the Modified BSD License.
10import base64
11import os
12import re
13import textwrap
14import warnings
15from urllib.parse import quote
16from xml.etree.ElementTree import Element
18import bleach
20# defusedxml does safe(r) parsing of untrusted XML data
21from defusedxml import ElementTree # type:ignore
23from nbconvert.preprocessors.sanitize import _get_default_css_sanitizer
25__all__ = [
26 "wrap_text",
27 "html2text",
28 "clean_html",
29 "add_anchor",
30 "strip_dollars",
31 "strip_files_prefix",
32 "comment_lines",
33 "get_lines",
34 "ipython2python",
35 "posix_path",
36 "path2url",
37 "add_prompts",
38 "ascii_only",
39 "prevent_list_blocks",
40 "strip_trailing_newline",
41 "text_base64",
42]
44from nbconvert.filters.svg_constants import ALLOWED_SVG_ATTRIBUTES, ALLOWED_SVG_TAGS
47def wrap_text(text, width=100):
48 """
49 Intelligently wrap text.
50 Wrap text without breaking words if possible.
52 Parameters
53 ----------
54 text : str
55 Text to wrap.
56 width : int, optional
57 Number of characters to wrap to, default 100.
58 """
60 split_text = text.split("\n")
61 wrp = map(lambda x: textwrap.wrap(x, width), split_text) # noqa
62 wrpd = map("\n".join, wrp)
63 return "\n".join(wrpd)
66def html2text(element):
67 """extract inner text from html
69 Analog of jQuery's $(element).text()
70 """
71 if isinstance(element, (str,)):
72 try:
73 element = ElementTree.fromstring(element)
74 except Exception:
75 # failed to parse, just return it unmodified
76 return element
78 text = element.text or ""
79 for child in element:
80 text += html2text(child)
81 text += element.tail or ""
82 return text
85def clean_html(element):
86 """Clean an html element."""
87 element = element.decode() if isinstance(element, bytes) else str(element)
88 kwargs = {}
89 css_sanitizer = _get_default_css_sanitizer()
90 if css_sanitizer:
91 kwargs['css_sanitizer'] = css_sanitizer
92 return bleach.clean(
93 element,
94 tags=[*bleach.ALLOWED_TAGS, *ALLOWED_SVG_TAGS, "div", "pre", "code", "span"],
95 strip_comments=False,
96 attributes={
97 **bleach.ALLOWED_ATTRIBUTES,
98 **{svg_tag: list(ALLOWED_SVG_ATTRIBUTES) for svg_tag in ALLOWED_SVG_TAGS},
99 "*": ["class", "id"],
100 },
101 **kwargs,
102 )
105def _convert_header_id(header_contents):
106 """Convert header contents to valid id value. Takes string as input, returns string.
108 Note: this may be subject to change in the case of changes to how we wish to generate ids.
110 For use on markdown headings.
111 """
112 # Valid IDs need to be non-empty and contain no space characters, but are otherwise arbitrary.
113 # However, these IDs are also used in URL fragments, which are more restrictive, so we URL
114 # encode any characters that are not valid in URL fragments.
115 return quote(header_contents.replace(" ", "-"), safe="?/:@!$&'()*+,;=")
118def add_anchor(html, anchor_link_text="¶"):
119 """Add an id and an anchor-link to an html header
121 For use on markdown headings
122 """
123 try:
124 h = ElementTree.fromstring(html)
125 except Exception:
126 # failed to parse, just return it unmodified
127 return html
128 link = _convert_header_id(html2text(h))
129 h.set("id", link)
130 a = Element("a", {"class": "anchor-link", "href": "#" + link})
131 try:
132 # Test if the anchor link text is HTML (e.g. an image)
133 a.append(ElementTree.fromstring(anchor_link_text))
134 except Exception:
135 # If we fail to parse, assume we've just got regular text
136 a.text = anchor_link_text
137 h.append(a)
139 return ElementTree.tostring(h).decode(encoding="utf-8")
142def add_prompts(code, first=">>> ", cont="... "):
143 """Add prompts to code snippets"""
144 new_code = []
145 code_list = code.split("\n")
146 new_code.append(first + code_list[0])
147 for line in code_list[1:]:
148 new_code.append(cont + line)
149 return "\n".join(new_code)
152def strip_dollars(text):
153 """
154 Remove all dollar symbols from text
156 Parameters
157 ----------
158 text : str
159 Text to remove dollars from
160 """
162 return text.strip("$")
165files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/')
166markdown_url_pattern = re.compile(r"(!?)\[(?P<caption>.*?)\]\(/?files/(?P<location>.*?)\)")
169def strip_files_prefix(text):
170 """
171 Fix all fake URLs that start with ``files/``, stripping out the ``files/`` prefix.
172 Applies to both urls (for html) and relative paths (for markdown paths).
174 Parameters
175 ----------
176 text : str
177 Text in which to replace 'src="files/real...' with 'src="real...'
178 """
179 cleaned_text = files_url_pattern.sub(r"\1=\2", text)
180 cleaned_text = markdown_url_pattern.sub(r"\1[\2](\3)", cleaned_text)
181 return cleaned_text
184def comment_lines(text, prefix="# "):
185 """
186 Build a Python comment line from input text.
188 Parameters
189 ----------
190 text : str
191 Text to comment out.
192 prefix : str
193 Character to append to the start of each line.
194 """
196 # Replace line breaks with line breaks and comment symbols.
197 # Also add a comment symbol at the beginning to comment out
198 # the first line.
199 return prefix + ("\n" + prefix).join(text.split("\n"))
202def get_lines(text, start=None, end=None):
203 """
204 Split the input text into separate lines and then return the
205 lines that the caller is interested in.
207 Parameters
208 ----------
209 text : str
210 Text to parse lines from.
211 start : int, optional
212 First line to grab from.
213 end : int, optional
214 Last line to grab from.
215 """
217 # Split the input into lines.
218 lines = text.split("\n")
220 # Return the right lines.
221 return "\n".join(lines[start:end]) # re-join
224def ipython2python(code):
225 """Transform IPython syntax to pure Python syntax
227 Parameters
228 ----------
229 code : str
230 IPython code, to be transformed to pure Python
231 """
232 try:
233 from IPython.core.inputtransformer2 import TransformerManager
234 except ImportError:
235 warnings.warn(
236 "IPython is needed to transform IPython syntax to pure Python."
237 " Install ipython if you need this functionality.",
238 stacklevel=2,
239 )
240 return code
241 else:
242 isp = TransformerManager()
243 return isp.transform_cell(code)
246def posix_path(path):
247 """Turn a path into posix-style path/to/etc
249 Mainly for use in latex on Windows,
250 where native Windows paths are not allowed.
251 """
252 if os.path.sep != "/":
253 return path.replace(os.path.sep, "/")
254 return path
257def path2url(path):
258 """Turn a file path into a URL"""
259 parts = path.split(os.path.sep)
260 return "/".join(quote(part) for part in parts)
263def ascii_only(s):
264 """ensure a string is ascii"""
265 return s.encode("ascii", "replace").decode("ascii")
268def prevent_list_blocks(s):
269 """
270 Prevent presence of enumerate or itemize blocks in latex headings cells
271 """
272 out = re.sub(r"(^\s*\d*)\.", r"\1\.", s)
273 out = re.sub(r"(^\s*)\-", r"\1\-", out)
274 out = re.sub(r"(^\s*)\+", r"\1\+", out)
275 out = re.sub(r"(^\s*)\*", r"\1\*", out)
276 return out
279def strip_trailing_newline(text):
280 """
281 Strips a newline from the end of text.
282 """
283 if text.endswith("\n"):
284 text = text[:-1]
285 return text
288def text_base64(text):
289 """
290 Encode base64 text
291 """
292 return base64.b64encode(text.encode()).decode()