Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/preprocessors/sanitize.py: 36%

1"""

2NBConvert Preprocessor for sanitizing HTML rendering of notebooks.

3"""

5import warnings

7from bleach import ALLOWED_ATTRIBUTES, ALLOWED_TAGS, clean

8from traitlets import Any, Bool, List, Set, Unicode

10from .base import Preprocessor

12_USE_BLEACH_CSS_SANITIZER = False

13_USE_BLEACH_STYLES = False

16try:

17 # bleach[css] >=5.0

18 from bleach.css_sanitizer import ALLOWED_CSS_PROPERTIES as ALLOWED_STYLES

19 from bleach.css_sanitizer import CSSSanitizer

21 _USE_BLEACH_CSS_SANITIZER = True

22 _USE_BLEACH_STYLES = False

23except ImportError:

24 try:

25 # bleach <5

26 from bleach import ALLOWED_STYLES # type:ignore[attr-defined, no-redef]

28 _USE_BLEACH_CSS_SANITIZER = False

29 _USE_BLEACH_STYLES = True

30 warnings.warn(

31 "Support for bleach <5 will be removed in a future version of nbconvert",

32 DeprecationWarning,

33 stacklevel=2,

34 )

36 except ImportError:

37 warnings.warn(

38 "The installed bleach/tinycss2 do not provide CSS sanitization, "

39 "please upgrade to bleach >=5",

40 UserWarning,

41 stacklevel=2,

42 )

45__all__ = ["SanitizeHTML"]

48class SanitizeHTML(Preprocessor):

49 """A preprocessor to sanitize html."""

51 # Bleach config.

52 attributes = Any(

53 config=True,

54 default_value=ALLOWED_ATTRIBUTES,

55 help="Allowed HTML tag attributes",

56 )

57 tags = List(

58 Unicode(),

59 config=True,

60 default_value=ALLOWED_TAGS, # type:ignore[arg-type]

61 help="List of HTML tags to allow",

62 )

63 styles = List(

64 Unicode(),

65 config=True,

66 default_value=ALLOWED_STYLES, # type:ignore[arg-type]

67 help="Allowed CSS styles if <style> tag is allowed",

68 )

69 strip = Bool(

70 config=True,

71 default_value=False,

72 help="If True, remove unsafe markup entirely instead of escaping",

73 )

74 strip_comments = Bool(

75 config=True,

76 default_value=True,

77 help="If True, strip comments from escaped HTML",

78 )

80 # Display data config.

81 safe_output_keys = Set(

82 config=True,

83 default_value={

84 "metadata", # Not a mimetype per-se, but expected and safe.

85 "text/plain",

86 "text/latex",

87 "application/json",

88 "image/png",

89 "image/jpeg",

90 },

91 help="Cell output mimetypes to render without modification",

92 )

93 sanitized_output_types = Set(

94 config=True,

95 default_value={

96 "text/html",

97 "text/markdown",

98 },

99 help="Cell output types to display after escaping with Bleach.",

100 )

101

102 def preprocess_cell(self, cell, resources, cell_index):

103 """

104 Sanitize potentially-dangerous contents of the cell.

105

106 Cell Types:

107 raw:

108 Sanitize literal HTML

109 markdown:

110 Sanitize literal HTML

111 code:

112 Sanitize outputs that could result in code execution

113 """

114 if cell.cell_type == "raw":

115 # Sanitize all raw cells anyway.

116 # Only ones with the text/html mimetype should be emitted

117 # but erring on the side of safety maybe.

118 cell.source = self.sanitize_html_tags(cell.source)

119 return cell, resources

120 if cell.cell_type == "markdown":

121 cell.source = self.sanitize_html_tags(cell.source)

122 return cell, resources

123 if cell.cell_type == "code":

124 cell.outputs = self.sanitize_code_outputs(cell.outputs)

125 return cell, resources

126 return None

127

128 def sanitize_code_outputs(self, outputs):

129 """

130 Sanitize code cell outputs.

131

132 Removes 'text/javascript' fields from display_data outputs, and

133 runs `sanitize_html_tags` over 'text/html'.

134 """

135 for output in outputs:

136 # These are always ascii, so nothing to escape.

137 if output["output_type"] in ("stream", "error"):

138 continue

139 data = output.data

140 to_remove = []

141 for key in data:

142 if key in self.safe_output_keys:

143 continue

144 if key in self.sanitized_output_types:

145 self.log.info("Sanitizing %s", key)

146 data[key] = self.sanitize_html_tags(data[key])

147 else:

148 # Mark key for removal. (Python doesn't allow deletion of

149 # keys from a dict during iteration)

150 to_remove.append(key)

151 for key in to_remove:

152 self.log.info("Removing %s", key)

153 del data[key]

154 return outputs

155

156 def sanitize_html_tags(self, html_str):

157 """

158 Sanitize a string containing raw HTML tags.

159 """

160 kwargs = {

161 "tags": self.tags,

162 "attributes": self.attributes,

163 "strip": self.strip,

164 "strip_comments": self.strip_comments,

165 }

166

167 if _USE_BLEACH_CSS_SANITIZER:

168 css_sanitizer = CSSSanitizer(allowed_css_properties=self.styles)

169 kwargs.update(css_sanitizer=css_sanitizer)

170 elif _USE_BLEACH_STYLES:

171 kwargs.update(styles=self.styles)

172

173 return clean(html_str, **kwargs)

174

175

176def _get_default_css_sanitizer():

177 if _USE_BLEACH_CSS_SANITIZER:

178 return CSSSanitizer(allowed_css_properties=ALLOWED_STYLES)

179 return None