1###############################################################################
2#
3# XMLwriter - A base class for XlsxWriter classes.
4#
5# Used in conjunction with XlsxWriter.
6#
7# SPDX-License-Identifier: BSD-2-Clause
8#
9# Copyright (c) 2013-2025, John McNamara, jmcnamara@cpan.org
10#
11
12# pylint: disable=dangerous-default-value
13
14# Standard packages.
15import re
16from io import StringIO
17
18# Compile performance critical regular expressions.
19re_control_chars_1 = re.compile("(_x[0-9a-fA-F]{4}_)")
20re_control_chars_2 = re.compile(r"([\x00-\x08\x0b-\x1f])")
21xml_escapes = re.compile('["&<>\n]')
22
23
24class XMLwriter:
25 """
26 Simple XML writer class.
27
28 """
29
30 def __init__(self):
31 self.fh = None
32 self.internal_fh = False
33
34 def _set_filehandle(self, filehandle):
35 # Set the writer filehandle directly. Mainly for testing.
36 self.fh = filehandle
37 self.internal_fh = False
38
39 def _set_xml_writer(self, filename):
40 # Set the XML writer filehandle for the object.
41 if isinstance(filename, StringIO):
42 self.internal_fh = False
43 self.fh = filename
44 else:
45 self.internal_fh = True
46 # pylint: disable-next=consider-using-with
47 self.fh = open(filename, "w", encoding="utf-8")
48
49 def _xml_close(self):
50 # Close the XML filehandle if we created it.
51 if self.internal_fh:
52 self.fh.close()
53
54 def _xml_declaration(self):
55 # Write the XML declaration.
56 self.fh.write('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n')
57
58 def _xml_start_tag(self, tag, attributes=[]):
59 # Write an XML start tag with optional attributes.
60 for key, value in attributes:
61 value = self._escape_attributes(value)
62 tag += f' {key}="{value}"'
63
64 self.fh.write(f"<{tag}>")
65
66 def _xml_start_tag_unencoded(self, tag, attributes=[]):
67 # Write an XML start tag with optional, unencoded, attributes.
68 # This is a minor speed optimization for elements that don't
69 # need encoding.
70 for key, value in attributes:
71 tag += f' {key}="{value}"'
72
73 self.fh.write(f"<{tag}>")
74
75 def _xml_end_tag(self, tag):
76 # Write an XML end tag.
77 self.fh.write(f"</{tag}>")
78
79 def _xml_empty_tag(self, tag, attributes=[]):
80 # Write an empty XML tag with optional attributes.
81 for key, value in attributes:
82 value = self._escape_attributes(value)
83 tag += f' {key}="{value}"'
84
85 self.fh.write(f"<{tag}/>")
86
87 def _xml_empty_tag_unencoded(self, tag, attributes=[]):
88 # Write an empty XML tag with optional, unencoded, attributes.
89 # This is a minor speed optimization for elements that don't
90 # need encoding.
91 for key, value in attributes:
92 tag += f' {key}="{value}"'
93
94 self.fh.write(f"<{tag}/>")
95
96 def _xml_data_element(self, tag, data, attributes=[]):
97 # Write an XML element containing data with optional attributes.
98 end_tag = tag
99
100 for key, value in attributes:
101 value = self._escape_attributes(value)
102 tag += f' {key}="{value}"'
103
104 data = self._escape_data(data)
105 data = self._escape_control_characters(data)
106
107 self.fh.write(f"<{tag}>{data}</{end_tag}>")
108
109 def _xml_string_element(self, index, attributes=[]):
110 # Optimized tag writer for <c> cell string elements in the inner loop.
111 attr = ""
112
113 for key, value in attributes:
114 value = self._escape_attributes(value)
115 attr += f' {key}="{value}"'
116
117 self.fh.write(f'<c{attr} t="s"><v>{index}</v></c>')
118
119 def _xml_si_element(self, string, attributes=[]):
120 # Optimized tag writer for shared strings <si> elements.
121 attr = ""
122
123 for key, value in attributes:
124 value = self._escape_attributes(value)
125 attr += f' {key}="{value}"'
126
127 string = self._escape_data(string)
128
129 self.fh.write(f"<si><t{attr}>{string}</t></si>")
130
131 def _xml_rich_si_element(self, string):
132 # Optimized tag writer for shared strings <si> rich string elements.
133
134 self.fh.write(f"<si>{string}</si>")
135
136 def _xml_number_element(self, number, attributes=[]):
137 # Optimized tag writer for <c> cell number elements in the inner loop.
138 attr = ""
139
140 for key, value in attributes:
141 value = self._escape_attributes(value)
142 attr += f' {key}="{value}"'
143
144 self.fh.write(f"<c{attr}><v>{number:.16G}</v></c>")
145
146 def _xml_formula_element(self, formula, result, attributes=[]):
147 # Optimized tag writer for <c> cell formula elements in the inner loop.
148 attr = ""
149
150 for key, value in attributes:
151 value = self._escape_attributes(value)
152 attr += f' {key}="{value}"'
153
154 formula = self._escape_data(formula)
155 result = self._escape_data(result)
156 self.fh.write(f"<c{attr}><f>{formula}</f><v>{result}</v></c>")
157
158 def _xml_inline_string(self, string, preserve, attributes=[]):
159 # Optimized tag writer for inlineStr cell elements in the inner loop.
160 attr = ""
161 t_attr = ""
162
163 # Set the <t> attribute to preserve whitespace.
164 if preserve:
165 t_attr = ' xml:space="preserve"'
166
167 for key, value in attributes:
168 value = self._escape_attributes(value)
169 attr += f' {key}="{value}"'
170
171 string = self._escape_data(string)
172
173 self.fh.write(f'<c{attr} t="inlineStr"><is><t{t_attr}>{string}</t></is></c>')
174
175 def _xml_rich_inline_string(self, string, attributes=[]):
176 # Optimized tag writer for rich inlineStr in the inner loop.
177 attr = ""
178
179 for key, value in attributes:
180 value = self._escape_attributes(value)
181 attr += f' {key}="{value}"'
182
183 self.fh.write(f'<c{attr} t="inlineStr"><is>{string}</is></c>')
184
185 def _escape_attributes(self, attribute):
186 # Escape XML characters in attributes.
187 try:
188 if not xml_escapes.search(attribute):
189 return attribute
190 except TypeError:
191 return attribute
192
193 attribute = (
194 attribute.replace("&", "&")
195 .replace('"', """)
196 .replace("<", "<")
197 .replace(">", ">")
198 .replace("\n", "
")
199 )
200 return attribute
201
202 def _escape_data(self, data):
203 # Escape XML characters in data sections of tags. Note, this
204 # is different from _escape_attributes() in that double quotes
205 # are not escaped by Excel.
206 try:
207 if not xml_escapes.search(data):
208 return data
209 except TypeError:
210 return data
211
212 data = data.replace("&", "&").replace("<", "<").replace(">", ">")
213 return data
214
215 @staticmethod
216 def _escape_control_characters(data):
217 # Excel escapes control characters with _xHHHH_ and also escapes any
218 # literal strings of that type by encoding the leading underscore.
219 # So "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_.
220 # The following substitutions deal with those cases.
221 try:
222 # Escape the escape.
223 data = re_control_chars_1.sub(r"_x005F\1", data)
224 except TypeError:
225 return data
226
227 # Convert control character to the _xHHHH_ escape.
228 data = re_control_chars_2.sub(
229 lambda match: f"_x{ord(match.group(1)):04X}_", data
230 )
231
232 # Escapes non characters in strings.
233 data = data.replace("\ufffe", "_xFFFE_").replace("\uffff", "_xFFFF_")
234
235 return data