Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/preprocessors/sanitize.py: 36%

1"""

2NBConvert Preprocessor for sanitizing HTML rendering of notebooks.

3"""

5import warnings

7from bleach import ALLOWED_ATTRIBUTES, ALLOWED_TAGS, clean

8from traitlets import Any, Bool, List, Set, Unicode

10from .base import Preprocessor

12_USE_BLEACH_CSS_SANITIZER = False

13_USE_BLEACH_STYLES = False

16try:

17 # bleach[css] >=5.0

18 from bleach.css_sanitizer import ALLOWED_CSS_PROPERTIES as ALLOWED_STYLES

19 from bleach.css_sanitizer import CSSSanitizer

21 _USE_BLEACH_CSS_SANITIZER = True

22 _USE_BLEACH_STYLES = False

23except ImportError:

24 try:

25 # bleach <5

26 from bleach import ALLOWED_STYLES # type:ignore

28 _USE_BLEACH_CSS_SANITIZER = False

29 _USE_BLEACH_STYLES = True

30 warnings.warn(

31 "Support for bleach <5 will be removed in a future version of nbconvert",

32 DeprecationWarning,

33 stacklevel=2,

34 )

36 except ImportError:

37 warnings.warn(

38 "The installed bleach/tinycss2 do not provide CSS sanitization, "

39 "please upgrade to bleach >=5",

40 UserWarning,

41 stacklevel=2,

42 )

45__all__ = ["SanitizeHTML"]

48class SanitizeHTML(Preprocessor):

49 """A preprocessor to sanitize html."""

51 # Bleach config.

52 attributes = Any(

53 config=True,

54 default_value=ALLOWED_ATTRIBUTES,

55 help="Allowed HTML tag attributes",

56 )

57 tags = List(

58 Unicode(),

59 config=True,

60 default_value=ALLOWED_TAGS,

61 help="List of HTML tags to allow",

62 )

63 styles = List(

64 Unicode(),

65 config=True,

66 default_value=ALLOWED_STYLES,

67 help="Allowed CSS styles if <style> tag is allowed",

68 )

69 strip = Bool(

70 config=True,

71 default_value=False,

72 help="If True, remove unsafe markup entirely instead of escaping",

73 )

74 strip_comments = Bool(

75 config=True,

76 default_value=True,

77 help="If True, strip comments from escaped HTML",

78 )

80 # Display data config.

81 safe_output_keys = Set(

82 config=True,

83 default_value={

84 "metadata", # Not a mimetype per-se, but expected and safe.

85 "text/plain",

86 "text/latex",

87 "application/json",

88 "image/png",

89 "image/jpeg",

90 },

91 help="Cell output mimetypes to render without modification",

92 )

93 sanitized_output_types = Set(

94 config=True,

95 default_value={

96 "text/html",

97 "text/markdown",

98 },

99 help="Cell output types to display after escaping with Bleach.",

100 )

101

102 def preprocess_cell(self, cell, resources, cell_index):

103 """

104 Sanitize potentially-dangerous contents of the cell.

105

106 Cell Types:

107 raw:

108 Sanitize literal HTML

109 markdown:

110 Sanitize literal HTML

111 code:

112 Sanitize outputs that could result in code execution

113 """

114 if cell.cell_type == "raw": # noqa

115 # Sanitize all raw cells anyway.

116 # Only ones with the text/html mimetype should be emitted

117 # but erring on the side of safety maybe.

118 cell.source = self.sanitize_html_tags(cell.source)

119 return cell, resources

120 elif cell.cell_type == "markdown":

121 cell.source = self.sanitize_html_tags(cell.source)

122 return cell, resources

123 elif cell.cell_type == "code":

124 cell.outputs = self.sanitize_code_outputs(cell.outputs)

125 return cell, resources

126

127 def sanitize_code_outputs(self, outputs):

128 """

129 Sanitize code cell outputs.

130

131 Removes 'text/javascript' fields from display_data outputs, and

132 runs `sanitize_html_tags` over 'text/html'.

133 """

134 for output in outputs:

135 # These are always ascii, so nothing to escape.

136 if output["output_type"] in ("stream", "error"):

137 continue

138 data = output.data

139 to_remove = []

140 for key in data:

141 if key in self.safe_output_keys:

142 continue

143 elif key in self.sanitized_output_types:

144 self.log.info("Sanitizing %s" % key)

145 data[key] = self.sanitize_html_tags(data[key])

146 else:

147 # Mark key for removal. (Python doesn't allow deletion of

148 # keys from a dict during iteration)

149 to_remove.append(key)

150 for key in to_remove:

151 self.log.info("Removing %s" % key)

152 del data[key]

153 return outputs

154

155 def sanitize_html_tags(self, html_str):

156 """

157 Sanitize a string containing raw HTML tags.

158 """

159 kwargs = {

160 "tags": self.tags,

161 "attributes": self.attributes,

162 "strip": self.strip,

163 "strip_comments": self.strip_comments,

164 }

165

166 if _USE_BLEACH_CSS_SANITIZER:

167 css_sanitizer = CSSSanitizer(allowed_css_properties=self.styles)

168 kwargs.update(css_sanitizer=css_sanitizer)

169 elif _USE_BLEACH_STYLES:

170 kwargs.update(styles=self.styles)

171

172 return clean(html_str, **kwargs)

173

174

175def _get_default_css_sanitizer():

176 if _USE_BLEACH_CSS_SANITIZER:

177 return CSSSanitizer(allowed_css_properties=ALLOWED_STYLES)