Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/preprocessors/sanitize.py: 36%
66 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2NBConvert Preprocessor for sanitizing HTML rendering of notebooks.
3"""
5import warnings
7from bleach import ALLOWED_ATTRIBUTES, ALLOWED_TAGS, clean
8from traitlets import Any, Bool, List, Set, Unicode
10from .base import Preprocessor
12_USE_BLEACH_CSS_SANITIZER = False
13_USE_BLEACH_STYLES = False
16try:
17 # bleach[css] >=5.0
18 from bleach.css_sanitizer import ALLOWED_CSS_PROPERTIES as ALLOWED_STYLES
19 from bleach.css_sanitizer import CSSSanitizer
21 _USE_BLEACH_CSS_SANITIZER = True
22 _USE_BLEACH_STYLES = False
23except ImportError:
24 try:
25 # bleach <5
26 from bleach import ALLOWED_STYLES # type:ignore
28 _USE_BLEACH_CSS_SANITIZER = False
29 _USE_BLEACH_STYLES = True
30 warnings.warn(
31 "Support for bleach <5 will be removed in a future version of nbconvert",
32 DeprecationWarning,
33 stacklevel=2,
34 )
36 except ImportError:
37 warnings.warn(
38 "The installed bleach/tinycss2 do not provide CSS sanitization, "
39 "please upgrade to bleach >=5",
40 UserWarning,
41 stacklevel=2,
42 )
45__all__ = ["SanitizeHTML"]
48class SanitizeHTML(Preprocessor):
49 """A preprocessor to sanitize html."""
51 # Bleach config.
52 attributes = Any(
53 config=True,
54 default_value=ALLOWED_ATTRIBUTES,
55 help="Allowed HTML tag attributes",
56 )
57 tags = List(
58 Unicode(),
59 config=True,
60 default_value=ALLOWED_TAGS,
61 help="List of HTML tags to allow",
62 )
63 styles = List(
64 Unicode(),
65 config=True,
66 default_value=ALLOWED_STYLES,
67 help="Allowed CSS styles if <style> tag is allowed",
68 )
69 strip = Bool(
70 config=True,
71 default_value=False,
72 help="If True, remove unsafe markup entirely instead of escaping",
73 )
74 strip_comments = Bool(
75 config=True,
76 default_value=True,
77 help="If True, strip comments from escaped HTML",
78 )
80 # Display data config.
81 safe_output_keys = Set(
82 config=True,
83 default_value={
84 "metadata", # Not a mimetype per-se, but expected and safe.
85 "text/plain",
86 "text/latex",
87 "application/json",
88 "image/png",
89 "image/jpeg",
90 },
91 help="Cell output mimetypes to render without modification",
92 )
93 sanitized_output_types = Set(
94 config=True,
95 default_value={
96 "text/html",
97 "text/markdown",
98 },
99 help="Cell output types to display after escaping with Bleach.",
100 )
102 def preprocess_cell(self, cell, resources, cell_index):
103 """
104 Sanitize potentially-dangerous contents of the cell.
106 Cell Types:
107 raw:
108 Sanitize literal HTML
109 markdown:
110 Sanitize literal HTML
111 code:
112 Sanitize outputs that could result in code execution
113 """
114 if cell.cell_type == "raw": # noqa
115 # Sanitize all raw cells anyway.
116 # Only ones with the text/html mimetype should be emitted
117 # but erring on the side of safety maybe.
118 cell.source = self.sanitize_html_tags(cell.source)
119 return cell, resources
120 elif cell.cell_type == "markdown":
121 cell.source = self.sanitize_html_tags(cell.source)
122 return cell, resources
123 elif cell.cell_type == "code":
124 cell.outputs = self.sanitize_code_outputs(cell.outputs)
125 return cell, resources
127 def sanitize_code_outputs(self, outputs):
128 """
129 Sanitize code cell outputs.
131 Removes 'text/javascript' fields from display_data outputs, and
132 runs `sanitize_html_tags` over 'text/html'.
133 """
134 for output in outputs:
135 # These are always ascii, so nothing to escape.
136 if output["output_type"] in ("stream", "error"):
137 continue
138 data = output.data
139 to_remove = []
140 for key in data:
141 if key in self.safe_output_keys:
142 continue
143 elif key in self.sanitized_output_types:
144 self.log.info("Sanitizing %s" % key)
145 data[key] = self.sanitize_html_tags(data[key])
146 else:
147 # Mark key for removal. (Python doesn't allow deletion of
148 # keys from a dict during iteration)
149 to_remove.append(key)
150 for key in to_remove:
151 self.log.info("Removing %s" % key)
152 del data[key]
153 return outputs
155 def sanitize_html_tags(self, html_str):
156 """
157 Sanitize a string containing raw HTML tags.
158 """
159 kwargs = {
160 "tags": self.tags,
161 "attributes": self.attributes,
162 "strip": self.strip,
163 "strip_comments": self.strip_comments,
164 }
166 if _USE_BLEACH_CSS_SANITIZER:
167 css_sanitizer = CSSSanitizer(allowed_css_properties=self.styles)
168 kwargs.update(css_sanitizer=css_sanitizer)
169 elif _USE_BLEACH_STYLES:
170 kwargs.update(styles=self.styles)
172 return clean(html_str, **kwargs)
175def _get_default_css_sanitizer():
176 if _USE_BLEACH_CSS_SANITIZER:
177 return CSSSanitizer(allowed_css_properties=ALLOWED_STYLES)