1"""xmlWriter.py -- Simple XML authoring class"""
2
3from __future__ import annotations
4
5from typing import BinaryIO, Callable, TextIO
6from fontTools.misc.textTools import byteord, strjoin, tobytes, tostr
7import sys
8import os
9import string
10import logging
11import itertools
12
13INDENT = " "
14TTX_LOG = logging.getLogger("fontTools.ttx")
15REPLACEMENT = "?"
16ILLEGAL_XML_CHARS = dict.fromkeys(
17 itertools.chain(
18 range(0x00, 0x09),
19 (0x0B, 0x0C),
20 range(0x0E, 0x20),
21 range(0xD800, 0xE000),
22 (0xFFFE, 0xFFFF),
23 ),
24 REPLACEMENT,
25)
26
27
28class XMLWriter(object):
29 def __init__(
30 self,
31 fileOrPath: str | os.PathLike[str] | BinaryIO | TextIO,
32 indentwhite: str = INDENT,
33 idlefunc: Callable[[], None] | None = None,
34 encoding: str = "utf_8",
35 newlinestr: str | bytes = "\n",
36 ) -> None:
37 if encoding.lower().replace("-", "").replace("_", "") != "utf8":
38 raise Exception("Only UTF-8 encoding is supported.")
39 if fileOrPath == "-":
40 fileOrPath = sys.stdout
41 self.filename: str | os.PathLike[str] | None
42 if not hasattr(fileOrPath, "write"):
43 if not isinstance(fileOrPath, (str, os.PathLike)):
44 raise TypeError(
45 "fileOrPath must be a file path (str or PathLike) if it isn't an object with a `write` method."
46 )
47 self.filename = fileOrPath
48 self.file = open(fileOrPath, "wb")
49 self._closeStream = True
50 else:
51 self.filename = None
52 # assume writable file object
53 self.file = fileOrPath
54 self._closeStream = False
55
56 # Figure out if writer expects bytes or unicodes
57 try:
58 # The bytes check should be first. See:
59 # https://github.com/fonttools/fonttools/pull/233
60 self.file.write(b"")
61 self.totype = tobytes
62 except TypeError:
63 # This better not fail.
64 self.file.write("")
65 self.totype = tostr
66 self.indentwhite = self.totype(indentwhite)
67 if newlinestr is None:
68 self.newlinestr = self.totype(os.linesep)
69 else:
70 self.newlinestr = self.totype(newlinestr)
71 self.indentlevel = 0
72 self.stack = []
73 self.needindent = 1
74 self.idlefunc = idlefunc
75 self.idlecounter = 0
76 self._writeraw('<?xml version="1.0" encoding="UTF-8"?>')
77 self.newline()
78
79 def __enter__(self):
80 return self
81
82 def __exit__(self, exception_type, exception_value, traceback):
83 self.close()
84
85 def close(self) -> None:
86 if self._closeStream:
87 assert not isinstance(self.file, (str, os.PathLike))
88 self.file.close()
89
90 def write(self, string, indent=True):
91 """Writes text."""
92 self._writeraw(escape(string), indent=indent)
93
94 def writecdata(self, string):
95 """Writes text in a CDATA section."""
96 self._writeraw("<![CDATA[" + string + "]]>")
97
98 def write8bit(self, data, strip=False):
99 """Writes a bytes() sequence into the XML, escaping
100 non-ASCII bytes. When this is read in xmlReader,
101 the original bytes can be recovered by encoding to
102 'latin-1'."""
103 self._writeraw(escape8bit(data), strip=strip)
104
105 def write_noindent(self, string):
106 """Writes text without indentation."""
107 self._writeraw(escape(string), indent=False)
108
109 def _writeraw(self, data, indent=True, strip=False):
110 """Writes bytes, possibly indented."""
111 if indent and self.needindent:
112 self.file.write(self.indentlevel * self.indentwhite)
113 self.needindent = 0
114 s = self.totype(data, encoding="utf_8")
115 if strip:
116 s = s.strip()
117 self.file.write(s)
118
119 def newline(self):
120 self.file.write(self.newlinestr)
121 self.needindent = 1
122 idlecounter = self.idlecounter
123 if not idlecounter % 100 and self.idlefunc is not None:
124 self.idlefunc()
125 self.idlecounter = idlecounter + 1
126
127 def comment(self, data):
128 data = escape(data)
129 lines = data.split("\n")
130 self._writeraw("<!-- " + lines[0])
131 for line in lines[1:]:
132 self.newline()
133 self._writeraw(" " + line)
134 self._writeraw(" -->")
135
136 def simpletag(self, _TAG_, *args, **kwargs):
137 attrdata = self.stringifyattrs(*args, **kwargs)
138 data = "<%s%s/>" % (_TAG_, attrdata)
139 self._writeraw(data)
140
141 def begintag(self, _TAG_, *args, **kwargs):
142 attrdata = self.stringifyattrs(*args, **kwargs)
143 data = "<%s%s>" % (_TAG_, attrdata)
144 self._writeraw(data)
145 self.stack.append(_TAG_)
146 self.indent()
147
148 def endtag(self, _TAG_):
149 assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag"
150 del self.stack[-1]
151 self.dedent()
152 data = "</%s>" % _TAG_
153 self._writeraw(data)
154
155 def dumphex(self, data):
156 linelength = 16
157 hexlinelength = linelength * 2
158 chunksize = 8
159 for i in range(0, len(data), linelength):
160 hexline = hexStr(data[i : i + linelength])
161 line = ""
162 white = ""
163 for j in range(0, hexlinelength, chunksize):
164 line = line + white + hexline[j : j + chunksize]
165 white = " "
166 self._writeraw(line)
167 self.newline()
168
169 def indent(self):
170 self.indentlevel = self.indentlevel + 1
171
172 def dedent(self):
173 assert self.indentlevel > 0
174 self.indentlevel = self.indentlevel - 1
175
176 def stringifyattrs(self, *args, **kwargs):
177 if kwargs:
178 assert not args
179 attributes = sorted(kwargs.items())
180 elif args:
181 assert len(args) == 1
182 attributes = args[0]
183 else:
184 return ""
185 data = ""
186 for attr, value in attributes:
187 if not isinstance(value, (bytes, str)):
188 value = str(value)
189 data = data + ' %s="%s"' % (attr, escapeattr(value))
190 return data
191
192
193def escape(data):
194 """Escape characters not allowed in `XML 1.0 <https://www.w3.org/TR/xml/#NT-Char>`_."""
195 data = tostr(data, "utf_8")
196 data = data.replace("&", "&")
197 data = data.replace("<", "<")
198 data = data.replace(">", ">")
199 data = data.replace("\r", " ")
200
201 newData = data.translate(ILLEGAL_XML_CHARS)
202 if newData != data:
203 maxLen = 10
204 preview = repr(data)
205 if len(data) > maxLen:
206 preview = repr(data[:maxLen])[1:-1] + "..."
207 TTX_LOG.warning(
208 "Illegal XML character(s) found; replacing offending string %r with %r",
209 preview,
210 REPLACEMENT,
211 )
212 return newData
213
214
215def escapeattr(data):
216 data = escape(data)
217 data = data.replace('"', """)
218 return data
219
220
221def escape8bit(data):
222 """Input is Unicode string."""
223
224 def escapechar(c):
225 n = ord(c)
226 if 32 <= n <= 127 and c not in "<&>":
227 return c
228 else:
229 return "&#" + repr(n) + ";"
230
231 return strjoin(map(escapechar, data.decode("latin-1")))
232
233
234def hexStr(s):
235 h = string.hexdigits
236 r = ""
237 for c in s:
238 i = byteord(c)
239 r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
240 return r