Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/xlsxwriter/xmlwriter.py: 76%

1###############################################################################

3# XMLwriter - A base class for XlsxWriter classes.

5# Used in conjunction with XlsxWriter.

7# SPDX-License-Identifier: BSD-2-Clause

10#

12# pylint: disable=dangerous-default-value

14# Standard packages.

15import re

16from io import StringIO

18# Compile performance critical regular expressions.

19re_control_chars_1 = re.compile("(_x[0-9a-fA-F]{4}_)")

20re_control_chars_2 = re.compile(r"([\x00-\x08\x0b-\x1f])")

21xml_escapes = re.compile('["&<>\n]')

24class XMLwriter:

25 """

26 Simple XML writer class.

28 """

30 def __init__(self):

31 self.fh = None

32 self.internal_fh = False

34 def _set_filehandle(self, filehandle):

35 # Set the writer filehandle directly. Mainly for testing.

36 self.fh = filehandle

37 self.internal_fh = False

39 def _set_xml_writer(self, filename):

40 # Set the XML writer filehandle for the object.

41 if isinstance(filename, StringIO):

42 self.internal_fh = False

43 self.fh = filename

44 else:

45 self.internal_fh = True

46 # pylint: disable-next=consider-using-with

47 self.fh = open(filename, "w", encoding="utf-8")

49 def _xml_close(self):

50 # Close the XML filehandle if we created it.

51 if self.internal_fh:

52 self.fh.close()

54 def _xml_declaration(self):

55 # Write the XML declaration.

56 self.fh.write('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n')

58 def _xml_start_tag(self, tag, attributes=[]):

59 # Write an XML start tag with optional attributes.

60 for key, value in attributes:

61 value = self._escape_attributes(value)

62 tag += f' {key}="{value}"'

64 self.fh.write(f"<{tag}>")

66 def _xml_start_tag_unencoded(self, tag, attributes=[]):

67 # Write an XML start tag with optional, unencoded, attributes.

68 # This is a minor speed optimization for elements that don't

69 # need encoding.

70 for key, value in attributes:

71 tag += f' {key}="{value}"'

73 self.fh.write(f"<{tag}>")

75 def _xml_end_tag(self, tag):

76 # Write an XML end tag.

77 self.fh.write(f"</{tag}>")

79 def _xml_empty_tag(self, tag, attributes=[]):

80 # Write an empty XML tag with optional attributes.

81 for key, value in attributes:

82 value = self._escape_attributes(value)

83 tag += f' {key}="{value}"'

85 self.fh.write(f"<{tag}/>")

87 def _xml_empty_tag_unencoded(self, tag, attributes=[]):

88 # Write an empty XML tag with optional, unencoded, attributes.

89 # This is a minor speed optimization for elements that don't

90 # need encoding.

91 for key, value in attributes:

92 tag += f' {key}="{value}"'

94 self.fh.write(f"<{tag}/>")

96 def _xml_data_element(self, tag, data, attributes=[]):

97 # Write an XML element containing data with optional attributes.

98 end_tag = tag

100 for key, value in attributes:

101 value = self._escape_attributes(value)

102 tag += f' {key}="{value}"'

103

104 data = self._escape_data(data)

105 data = self._escape_control_characters(data)

106

107 self.fh.write(f"<{tag}>{data}</{end_tag}>")

108

109 def _xml_string_element(self, index, attributes=[]):

110 # Optimized tag writer for <c> cell string elements in the inner loop.

111 attr = ""

112

113 for key, value in attributes:

114 value = self._escape_attributes(value)

115 attr += f' {key}="{value}"'

116

117 self.fh.write(f'<c{attr} t="s"><v>{index}</v></c>')

118

119 def _xml_si_element(self, string, attributes=[]):

120 # Optimized tag writer for shared strings <si> elements.

121 attr = ""

122

123 for key, value in attributes:

124 value = self._escape_attributes(value)

125 attr += f' {key}="{value}"'

126

127 string = self._escape_data(string)

128

129 self.fh.write(f"<si><t{attr}>{string}</t></si>")

130

131 def _xml_rich_si_element(self, string):

132 # Optimized tag writer for shared strings <si> rich string elements.

133

134 self.fh.write(f"<si>{string}</si>")

135

136 def _xml_number_element(self, number, attributes=[]):

137 # Optimized tag writer for <c> cell number elements in the inner loop.

138 attr = ""

139

140 for key, value in attributes:

141 value = self._escape_attributes(value)

142 attr += f' {key}="{value}"'

143

144 self.fh.write(f"<c{attr}><v>{number:.16G}</v></c>")

145

146 def _xml_formula_element(self, formula, result, attributes=[]):

147 # Optimized tag writer for <c> cell formula elements in the inner loop.

148 attr = ""

149

150 for key, value in attributes:

151 value = self._escape_attributes(value)

152 attr += f' {key}="{value}"'

153

154 formula = self._escape_data(formula)

155 result = self._escape_data(result)

156 self.fh.write(f"<c{attr}><f>{formula}</f><v>{result}</v></c>")

157

158 def _xml_inline_string(self, string, preserve, attributes=[]):

159 # Optimized tag writer for inlineStr cell elements in the inner loop.

160 attr = ""

161 t_attr = ""

162

163 # Set the <t> attribute to preserve whitespace.

164 if preserve:

165 t_attr = ' xml:space="preserve"'

166

167 for key, value in attributes:

168 value = self._escape_attributes(value)

169 attr += f' {key}="{value}"'

170

171 string = self._escape_data(string)

172

173 self.fh.write(f'<c{attr} t="inlineStr"><is><t{t_attr}>{string}</t></is></c>')

174

175 def _xml_rich_inline_string(self, string, attributes=[]):

176 # Optimized tag writer for rich inlineStr in the inner loop.

177 attr = ""

178

179 for key, value in attributes:

180 value = self._escape_attributes(value)

181 attr += f' {key}="{value}"'

182

183 self.fh.write(f'<c{attr} t="inlineStr"><is>{string}</is></c>')

184

185 def _escape_attributes(self, attribute):

186 # Escape XML characters in attributes.

187 try:

188 if not xml_escapes.search(attribute):

189 return attribute

190 except TypeError:

191 return attribute

192

193 attribute = (

194 attribute.replace("&", "&")

195 .replace('"', """)

196 .replace("<", "<")

197 .replace(">", ">")

198 .replace("\n", "
")

199 )

200 return attribute

201

202 def _escape_data(self, data):

203 # Escape XML characters in data sections of tags. Note, this

204 # is different from _escape_attributes() in that double quotes

205 # are not escaped by Excel.

206 try:

207 if not xml_escapes.search(data):

208 return data

209 except TypeError:

210 return data

211

212 data = data.replace("&", "&").replace("<", "<").replace(">", ">")

213 return data

214

215 @staticmethod

216 def _escape_control_characters(data):

217 # Excel escapes control characters with _xHHHH_ and also escapes any

218 # literal strings of that type by encoding the leading underscore.

219 # So "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_.

220 # The following substitutions deal with those cases.

221 try:

222 # Escape the escape.

223 data = re_control_chars_1.sub(r"_x005F\1", data)

224 except TypeError:

225 return data

226

227 # Convert control character to the _xHHHH_ escape.

228 data = re_control_chars_2.sub(

229 lambda match: f"_x{ord(match.group(1)):04X}_", data

230 )

231

232 # Escapes non characters in strings.

233 data = data.replace("\ufffe", "_xFFFE_").replace("\uffff", "_xFFFF_")

234

235 return data