Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/preprocessors/sanitize.py: 36%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

69 statements  

1""" 

2NBConvert Preprocessor for sanitizing HTML rendering of notebooks. 

3""" 

4 

5import warnings 

6 

7from bleach import ALLOWED_ATTRIBUTES, ALLOWED_TAGS, clean 

8from traitlets import Any, Bool, List, Set, Unicode 

9 

10from .base import Preprocessor 

11 

12_USE_BLEACH_CSS_SANITIZER = False 

13_USE_BLEACH_STYLES = False 

14 

15 

16try: 

17 # bleach[css] >=5.0 

18 from bleach.css_sanitizer import ALLOWED_CSS_PROPERTIES as ALLOWED_STYLES 

19 from bleach.css_sanitizer import CSSSanitizer 

20 

21 _USE_BLEACH_CSS_SANITIZER = True 

22 _USE_BLEACH_STYLES = False 

23except ImportError: 

24 try: 

25 # bleach <5 

26 from bleach import ALLOWED_STYLES # type:ignore[attr-defined, no-redef] 

27 

28 _USE_BLEACH_CSS_SANITIZER = False 

29 _USE_BLEACH_STYLES = True 

30 warnings.warn( 

31 "Support for bleach <5 will be removed in a future version of nbconvert", 

32 DeprecationWarning, 

33 stacklevel=2, 

34 ) 

35 

36 except ImportError: 

37 warnings.warn( 

38 "The installed bleach/tinycss2 do not provide CSS sanitization, " 

39 "please upgrade to bleach >=5", 

40 UserWarning, 

41 stacklevel=2, 

42 ) 

43 

44 

45__all__ = ["SanitizeHTML"] 

46 

47 

48class SanitizeHTML(Preprocessor): 

49 """A preprocessor to sanitize html.""" 

50 

51 # Bleach config. 

52 attributes = Any( 

53 config=True, 

54 default_value=ALLOWED_ATTRIBUTES, 

55 help="Allowed HTML tag attributes", 

56 ) 

57 tags = List( 

58 Unicode(), 

59 config=True, 

60 default_value=ALLOWED_TAGS, # type:ignore[arg-type] 

61 help="List of HTML tags to allow", 

62 ) 

63 styles = List( 

64 Unicode(), 

65 config=True, 

66 default_value=ALLOWED_STYLES, # type:ignore[arg-type] 

67 help="Allowed CSS styles if <style> tag is allowed", 

68 ) 

69 strip = Bool( 

70 config=True, 

71 default_value=False, 

72 help="If True, remove unsafe markup entirely instead of escaping", 

73 ) 

74 strip_comments = Bool( 

75 config=True, 

76 default_value=True, 

77 help="If True, strip comments from escaped HTML", 

78 ) 

79 

80 # Display data config. 

81 safe_output_keys = Set( 

82 config=True, 

83 default_value={ 

84 "metadata", # Not a mimetype per-se, but expected and safe. 

85 "text/plain", 

86 "text/latex", 

87 "application/json", 

88 "image/png", 

89 "image/jpeg", 

90 }, 

91 help="Cell output mimetypes to render without modification", 

92 ) 

93 sanitized_output_types = Set( 

94 config=True, 

95 default_value={ 

96 "text/html", 

97 "text/markdown", 

98 }, 

99 help="Cell output types to display after escaping with Bleach.", 

100 ) 

101 

102 def preprocess_cell(self, cell, resources, cell_index): 

103 """ 

104 Sanitize potentially-dangerous contents of the cell. 

105 

106 Cell Types: 

107 raw: 

108 Sanitize literal HTML 

109 markdown: 

110 Sanitize literal HTML 

111 code: 

112 Sanitize outputs that could result in code execution 

113 """ 

114 if cell.cell_type == "raw": 

115 # Sanitize all raw cells anyway. 

116 # Only ones with the text/html mimetype should be emitted 

117 # but erring on the side of safety maybe. 

118 cell.source = self.sanitize_html_tags(cell.source) 

119 return cell, resources 

120 if cell.cell_type == "markdown": 

121 cell.source = self.sanitize_html_tags(cell.source) 

122 return cell, resources 

123 if cell.cell_type == "code": 

124 cell.outputs = self.sanitize_code_outputs(cell.outputs) 

125 return cell, resources 

126 return None 

127 

128 def sanitize_code_outputs(self, outputs): 

129 """ 

130 Sanitize code cell outputs. 

131 

132 Removes 'text/javascript' fields from display_data outputs, and 

133 runs `sanitize_html_tags` over 'text/html'. 

134 """ 

135 for output in outputs: 

136 # These are always ascii, so nothing to escape. 

137 if output["output_type"] in ("stream", "error"): 

138 continue 

139 data = output.data 

140 to_remove = [] 

141 for key in data: 

142 if key in self.safe_output_keys: 

143 continue 

144 if key in self.sanitized_output_types: 

145 self.log.info("Sanitizing %s", key) 

146 data[key] = self.sanitize_html_tags(data[key]) 

147 else: 

148 # Mark key for removal. (Python doesn't allow deletion of 

149 # keys from a dict during iteration) 

150 to_remove.append(key) 

151 for key in to_remove: 

152 self.log.info("Removing %s", key) 

153 del data[key] 

154 return outputs 

155 

156 def sanitize_html_tags(self, html_str): 

157 """ 

158 Sanitize a string containing raw HTML tags. 

159 """ 

160 kwargs = { 

161 "tags": self.tags, 

162 "attributes": self.attributes, 

163 "strip": self.strip, 

164 "strip_comments": self.strip_comments, 

165 } 

166 

167 if _USE_BLEACH_CSS_SANITIZER: 

168 css_sanitizer = CSSSanitizer(allowed_css_properties=self.styles) 

169 kwargs.update(css_sanitizer=css_sanitizer) 

170 elif _USE_BLEACH_STYLES: 

171 kwargs.update(styles=self.styles) 

172 

173 return clean(html_str, **kwargs) 

174 

175 

176def _get_default_css_sanitizer(): 

177 if _USE_BLEACH_CSS_SANITIZER: 

178 return CSSSanitizer(allowed_css_properties=ALLOWED_STYLES) 

179 return None