Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/preprocessors/sanitize.py: 36%

66 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2NBConvert Preprocessor for sanitizing HTML rendering of notebooks. 

3""" 

4 

5import warnings 

6 

7from bleach import ALLOWED_ATTRIBUTES, ALLOWED_TAGS, clean 

8from traitlets import Any, Bool, List, Set, Unicode 

9 

10from .base import Preprocessor 

11 

12_USE_BLEACH_CSS_SANITIZER = False 

13_USE_BLEACH_STYLES = False 

14 

15 

16try: 

17 # bleach[css] >=5.0 

18 from bleach.css_sanitizer import ALLOWED_CSS_PROPERTIES as ALLOWED_STYLES 

19 from bleach.css_sanitizer import CSSSanitizer 

20 

21 _USE_BLEACH_CSS_SANITIZER = True 

22 _USE_BLEACH_STYLES = False 

23except ImportError: 

24 try: 

25 # bleach <5 

26 from bleach import ALLOWED_STYLES # type:ignore 

27 

28 _USE_BLEACH_CSS_SANITIZER = False 

29 _USE_BLEACH_STYLES = True 

30 warnings.warn( 

31 "Support for bleach <5 will be removed in a future version of nbconvert", 

32 DeprecationWarning, 

33 stacklevel=2, 

34 ) 

35 

36 except ImportError: 

37 warnings.warn( 

38 "The installed bleach/tinycss2 do not provide CSS sanitization, " 

39 "please upgrade to bleach >=5", 

40 UserWarning, 

41 stacklevel=2, 

42 ) 

43 

44 

45__all__ = ["SanitizeHTML"] 

46 

47 

48class SanitizeHTML(Preprocessor): 

49 """A preprocessor to sanitize html.""" 

50 

51 # Bleach config. 

52 attributes = Any( 

53 config=True, 

54 default_value=ALLOWED_ATTRIBUTES, 

55 help="Allowed HTML tag attributes", 

56 ) 

57 tags = List( 

58 Unicode(), 

59 config=True, 

60 default_value=ALLOWED_TAGS, 

61 help="List of HTML tags to allow", 

62 ) 

63 styles = List( 

64 Unicode(), 

65 config=True, 

66 default_value=ALLOWED_STYLES, 

67 help="Allowed CSS styles if <style> tag is allowed", 

68 ) 

69 strip = Bool( 

70 config=True, 

71 default_value=False, 

72 help="If True, remove unsafe markup entirely instead of escaping", 

73 ) 

74 strip_comments = Bool( 

75 config=True, 

76 default_value=True, 

77 help="If True, strip comments from escaped HTML", 

78 ) 

79 

80 # Display data config. 

81 safe_output_keys = Set( 

82 config=True, 

83 default_value={ 

84 "metadata", # Not a mimetype per-se, but expected and safe. 

85 "text/plain", 

86 "text/latex", 

87 "application/json", 

88 "image/png", 

89 "image/jpeg", 

90 }, 

91 help="Cell output mimetypes to render without modification", 

92 ) 

93 sanitized_output_types = Set( 

94 config=True, 

95 default_value={ 

96 "text/html", 

97 "text/markdown", 

98 }, 

99 help="Cell output types to display after escaping with Bleach.", 

100 ) 

101 

102 def preprocess_cell(self, cell, resources, cell_index): 

103 """ 

104 Sanitize potentially-dangerous contents of the cell. 

105 

106 Cell Types: 

107 raw: 

108 Sanitize literal HTML 

109 markdown: 

110 Sanitize literal HTML 

111 code: 

112 Sanitize outputs that could result in code execution 

113 """ 

114 if cell.cell_type == "raw": # noqa 

115 # Sanitize all raw cells anyway. 

116 # Only ones with the text/html mimetype should be emitted 

117 # but erring on the side of safety maybe. 

118 cell.source = self.sanitize_html_tags(cell.source) 

119 return cell, resources 

120 elif cell.cell_type == "markdown": 

121 cell.source = self.sanitize_html_tags(cell.source) 

122 return cell, resources 

123 elif cell.cell_type == "code": 

124 cell.outputs = self.sanitize_code_outputs(cell.outputs) 

125 return cell, resources 

126 

127 def sanitize_code_outputs(self, outputs): 

128 """ 

129 Sanitize code cell outputs. 

130 

131 Removes 'text/javascript' fields from display_data outputs, and 

132 runs `sanitize_html_tags` over 'text/html'. 

133 """ 

134 for output in outputs: 

135 # These are always ascii, so nothing to escape. 

136 if output["output_type"] in ("stream", "error"): 

137 continue 

138 data = output.data 

139 to_remove = [] 

140 for key in data: 

141 if key in self.safe_output_keys: 

142 continue 

143 elif key in self.sanitized_output_types: 

144 self.log.info("Sanitizing %s" % key) 

145 data[key] = self.sanitize_html_tags(data[key]) 

146 else: 

147 # Mark key for removal. (Python doesn't allow deletion of 

148 # keys from a dict during iteration) 

149 to_remove.append(key) 

150 for key in to_remove: 

151 self.log.info("Removing %s" % key) 

152 del data[key] 

153 return outputs 

154 

155 def sanitize_html_tags(self, html_str): 

156 """ 

157 Sanitize a string containing raw HTML tags. 

158 """ 

159 kwargs = { 

160 "tags": self.tags, 

161 "attributes": self.attributes, 

162 "strip": self.strip, 

163 "strip_comments": self.strip_comments, 

164 } 

165 

166 if _USE_BLEACH_CSS_SANITIZER: 

167 css_sanitizer = CSSSanitizer(allowed_css_properties=self.styles) 

168 kwargs.update(css_sanitizer=css_sanitizer) 

169 elif _USE_BLEACH_STYLES: 

170 kwargs.update(styles=self.styles) 

171 

172 return clean(html_str, **kwargs) 

173 

174 

175def _get_default_css_sanitizer(): 

176 if _USE_BLEACH_CSS_SANITIZER: 

177 return CSSSanitizer(allowed_css_properties=ALLOWED_STYLES)