Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/xlsxwriter/xmlwriter.py: 76%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

122 statements  

1############################################################################### 

2# 

3# XMLwriter - A base class for XlsxWriter classes. 

4# 

5# Used in conjunction with XlsxWriter. 

6# 

7# SPDX-License-Identifier: BSD-2-Clause 

8# 

9# Copyright (c) 2013-2025, John McNamara, jmcnamara@cpan.org 

10# 

11 

12# pylint: disable=dangerous-default-value 

13 

14# Standard packages. 

15import re 

16from io import StringIO 

17 

18# Compile performance critical regular expressions. 

19re_control_chars_1 = re.compile("(_x[0-9a-fA-F]{4}_)") 

20re_control_chars_2 = re.compile(r"([\x00-\x08\x0b-\x1f])") 

21xml_escapes = re.compile('["&<>\n]') 

22 

23 

24class XMLwriter: 

25 """ 

26 Simple XML writer class. 

27 

28 """ 

29 

30 def __init__(self): 

31 self.fh = None 

32 self.internal_fh = False 

33 

34 def _set_filehandle(self, filehandle): 

35 # Set the writer filehandle directly. Mainly for testing. 

36 self.fh = filehandle 

37 self.internal_fh = False 

38 

39 def _set_xml_writer(self, filename): 

40 # Set the XML writer filehandle for the object. 

41 if isinstance(filename, StringIO): 

42 self.internal_fh = False 

43 self.fh = filename 

44 else: 

45 self.internal_fh = True 

46 # pylint: disable-next=consider-using-with 

47 self.fh = open(filename, "w", encoding="utf-8") 

48 

49 def _xml_close(self): 

50 # Close the XML filehandle if we created it. 

51 if self.internal_fh: 

52 self.fh.close() 

53 

54 def _xml_declaration(self): 

55 # Write the XML declaration. 

56 self.fh.write('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n') 

57 

58 def _xml_start_tag(self, tag, attributes=[]): 

59 # Write an XML start tag with optional attributes. 

60 for key, value in attributes: 

61 value = self._escape_attributes(value) 

62 tag += f' {key}="{value}"' 

63 

64 self.fh.write(f"<{tag}>") 

65 

66 def _xml_start_tag_unencoded(self, tag, attributes=[]): 

67 # Write an XML start tag with optional, unencoded, attributes. 

68 # This is a minor speed optimization for elements that don't 

69 # need encoding. 

70 for key, value in attributes: 

71 tag += f' {key}="{value}"' 

72 

73 self.fh.write(f"<{tag}>") 

74 

75 def _xml_end_tag(self, tag): 

76 # Write an XML end tag. 

77 self.fh.write(f"</{tag}>") 

78 

79 def _xml_empty_tag(self, tag, attributes=[]): 

80 # Write an empty XML tag with optional attributes. 

81 for key, value in attributes: 

82 value = self._escape_attributes(value) 

83 tag += f' {key}="{value}"' 

84 

85 self.fh.write(f"<{tag}/>") 

86 

87 def _xml_empty_tag_unencoded(self, tag, attributes=[]): 

88 # Write an empty XML tag with optional, unencoded, attributes. 

89 # This is a minor speed optimization for elements that don't 

90 # need encoding. 

91 for key, value in attributes: 

92 tag += f' {key}="{value}"' 

93 

94 self.fh.write(f"<{tag}/>") 

95 

96 def _xml_data_element(self, tag, data, attributes=[]): 

97 # Write an XML element containing data with optional attributes. 

98 end_tag = tag 

99 

100 for key, value in attributes: 

101 value = self._escape_attributes(value) 

102 tag += f' {key}="{value}"' 

103 

104 data = self._escape_data(data) 

105 data = self._escape_control_characters(data) 

106 

107 self.fh.write(f"<{tag}>{data}</{end_tag}>") 

108 

109 def _xml_string_element(self, index, attributes=[]): 

110 # Optimized tag writer for <c> cell string elements in the inner loop. 

111 attr = "" 

112 

113 for key, value in attributes: 

114 value = self._escape_attributes(value) 

115 attr += f' {key}="{value}"' 

116 

117 self.fh.write(f'<c{attr} t="s"><v>{index}</v></c>') 

118 

119 def _xml_si_element(self, string, attributes=[]): 

120 # Optimized tag writer for shared strings <si> elements. 

121 attr = "" 

122 

123 for key, value in attributes: 

124 value = self._escape_attributes(value) 

125 attr += f' {key}="{value}"' 

126 

127 string = self._escape_data(string) 

128 

129 self.fh.write(f"<si><t{attr}>{string}</t></si>") 

130 

131 def _xml_rich_si_element(self, string): 

132 # Optimized tag writer for shared strings <si> rich string elements. 

133 

134 self.fh.write(f"<si>{string}</si>") 

135 

136 def _xml_number_element(self, number, attributes=[]): 

137 # Optimized tag writer for <c> cell number elements in the inner loop. 

138 attr = "" 

139 

140 for key, value in attributes: 

141 value = self._escape_attributes(value) 

142 attr += f' {key}="{value}"' 

143 

144 self.fh.write(f"<c{attr}><v>{number:.16G}</v></c>") 

145 

146 def _xml_formula_element(self, formula, result, attributes=[]): 

147 # Optimized tag writer for <c> cell formula elements in the inner loop. 

148 attr = "" 

149 

150 for key, value in attributes: 

151 value = self._escape_attributes(value) 

152 attr += f' {key}="{value}"' 

153 

154 formula = self._escape_data(formula) 

155 result = self._escape_data(result) 

156 self.fh.write(f"<c{attr}><f>{formula}</f><v>{result}</v></c>") 

157 

158 def _xml_inline_string(self, string, preserve, attributes=[]): 

159 # Optimized tag writer for inlineStr cell elements in the inner loop. 

160 attr = "" 

161 t_attr = "" 

162 

163 # Set the <t> attribute to preserve whitespace. 

164 if preserve: 

165 t_attr = ' xml:space="preserve"' 

166 

167 for key, value in attributes: 

168 value = self._escape_attributes(value) 

169 attr += f' {key}="{value}"' 

170 

171 string = self._escape_data(string) 

172 

173 self.fh.write(f'<c{attr} t="inlineStr"><is><t{t_attr}>{string}</t></is></c>') 

174 

175 def _xml_rich_inline_string(self, string, attributes=[]): 

176 # Optimized tag writer for rich inlineStr in the inner loop. 

177 attr = "" 

178 

179 for key, value in attributes: 

180 value = self._escape_attributes(value) 

181 attr += f' {key}="{value}"' 

182 

183 self.fh.write(f'<c{attr} t="inlineStr"><is>{string}</is></c>') 

184 

185 def _escape_attributes(self, attribute): 

186 # Escape XML characters in attributes. 

187 try: 

188 if not xml_escapes.search(attribute): 

189 return attribute 

190 except TypeError: 

191 return attribute 

192 

193 attribute = ( 

194 attribute.replace("&", "&amp;") 

195 .replace('"', "&quot;") 

196 .replace("<", "&lt;") 

197 .replace(">", "&gt;") 

198 .replace("\n", "&#xA;") 

199 ) 

200 return attribute 

201 

202 def _escape_data(self, data): 

203 # Escape XML characters in data sections of tags. Note, this 

204 # is different from _escape_attributes() in that double quotes 

205 # are not escaped by Excel. 

206 try: 

207 if not xml_escapes.search(data): 

208 return data 

209 except TypeError: 

210 return data 

211 

212 data = data.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;") 

213 return data 

214 

215 @staticmethod 

216 def _escape_control_characters(data): 

217 # Excel escapes control characters with _xHHHH_ and also escapes any 

218 # literal strings of that type by encoding the leading underscore. 

219 # So "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_. 

220 # The following substitutions deal with those cases. 

221 try: 

222 # Escape the escape. 

223 data = re_control_chars_1.sub(r"_x005F\1", data) 

224 except TypeError: 

225 return data 

226 

227 # Convert control character to the _xHHHH_ escape. 

228 data = re_control_chars_2.sub( 

229 lambda match: f"_x{ord(match.group(1)):04X}_", data 

230 ) 

231 

232 # Escapes non characters in strings. 

233 data = data.replace("\ufffe", "_xFFFE_").replace("\uffff", "_xFFFF_") 

234 

235 return data