1"""xmlWriter.py -- Simple XML authoring class"""
2
3from fontTools.misc.textTools import byteord, strjoin, tobytes, tostr
4import sys
5import os
6import string
7import logging
8import itertools
9
10INDENT = " "
11TTX_LOG = logging.getLogger("fontTools.ttx")
12REPLACEMENT = "?"
13ILLEGAL_XML_CHARS = dict.fromkeys(
14 itertools.chain(
15 range(0x00, 0x09),
16 (0x0B, 0x0C),
17 range(0x0E, 0x20),
18 range(0xD800, 0xE000),
19 (0xFFFE, 0xFFFF),
20 ),
21 REPLACEMENT,
22)
23
24
25class XMLWriter(object):
26 def __init__(
27 self,
28 fileOrPath,
29 indentwhite=INDENT,
30 idlefunc=None,
31 encoding="utf_8",
32 newlinestr="\n",
33 ):
34 if encoding.lower().replace("-", "").replace("_", "") != "utf8":
35 raise Exception("Only UTF-8 encoding is supported.")
36 if fileOrPath == "-":
37 fileOrPath = sys.stdout
38 if not hasattr(fileOrPath, "write"):
39 self.filename = fileOrPath
40 self.file = open(fileOrPath, "wb")
41 self._closeStream = True
42 else:
43 self.filename = None
44 # assume writable file object
45 self.file = fileOrPath
46 self._closeStream = False
47
48 # Figure out if writer expects bytes or unicodes
49 try:
50 # The bytes check should be first. See:
51 # https://github.com/fonttools/fonttools/pull/233
52 self.file.write(b"")
53 self.totype = tobytes
54 except TypeError:
55 # This better not fail.
56 self.file.write("")
57 self.totype = tostr
58 self.indentwhite = self.totype(indentwhite)
59 if newlinestr is None:
60 self.newlinestr = self.totype(os.linesep)
61 else:
62 self.newlinestr = self.totype(newlinestr)
63 self.indentlevel = 0
64 self.stack = []
65 self.needindent = 1
66 self.idlefunc = idlefunc
67 self.idlecounter = 0
68 self._writeraw('<?xml version="1.0" encoding="UTF-8"?>')
69 self.newline()
70
71 def __enter__(self):
72 return self
73
74 def __exit__(self, exception_type, exception_value, traceback):
75 self.close()
76
77 def close(self):
78 if self._closeStream:
79 self.file.close()
80
81 def write(self, string, indent=True):
82 """Writes text."""
83 self._writeraw(escape(string), indent=indent)
84
85 def writecdata(self, string):
86 """Writes text in a CDATA section."""
87 self._writeraw("<![CDATA[" + string + "]]>")
88
89 def write8bit(self, data, strip=False):
90 """Writes a bytes() sequence into the XML, escaping
91 non-ASCII bytes. When this is read in xmlReader,
92 the original bytes can be recovered by encoding to
93 'latin-1'."""
94 self._writeraw(escape8bit(data), strip=strip)
95
96 def write_noindent(self, string):
97 """Writes text without indentation."""
98 self._writeraw(escape(string), indent=False)
99
100 def _writeraw(self, data, indent=True, strip=False):
101 """Writes bytes, possibly indented."""
102 if indent and self.needindent:
103 self.file.write(self.indentlevel * self.indentwhite)
104 self.needindent = 0
105 s = self.totype(data, encoding="utf_8")
106 if strip:
107 s = s.strip()
108 self.file.write(s)
109
110 def newline(self):
111 self.file.write(self.newlinestr)
112 self.needindent = 1
113 idlecounter = self.idlecounter
114 if not idlecounter % 100 and self.idlefunc is not None:
115 self.idlefunc()
116 self.idlecounter = idlecounter + 1
117
118 def comment(self, data):
119 data = escape(data)
120 lines = data.split("\n")
121 self._writeraw("<!-- " + lines[0])
122 for line in lines[1:]:
123 self.newline()
124 self._writeraw(" " + line)
125 self._writeraw(" -->")
126
127 def simpletag(self, _TAG_, *args, **kwargs):
128 attrdata = self.stringifyattrs(*args, **kwargs)
129 data = "<%s%s/>" % (_TAG_, attrdata)
130 self._writeraw(data)
131
132 def begintag(self, _TAG_, *args, **kwargs):
133 attrdata = self.stringifyattrs(*args, **kwargs)
134 data = "<%s%s>" % (_TAG_, attrdata)
135 self._writeraw(data)
136 self.stack.append(_TAG_)
137 self.indent()
138
139 def endtag(self, _TAG_):
140 assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag"
141 del self.stack[-1]
142 self.dedent()
143 data = "</%s>" % _TAG_
144 self._writeraw(data)
145
146 def dumphex(self, data):
147 linelength = 16
148 hexlinelength = linelength * 2
149 chunksize = 8
150 for i in range(0, len(data), linelength):
151 hexline = hexStr(data[i : i + linelength])
152 line = ""
153 white = ""
154 for j in range(0, hexlinelength, chunksize):
155 line = line + white + hexline[j : j + chunksize]
156 white = " "
157 self._writeraw(line)
158 self.newline()
159
160 def indent(self):
161 self.indentlevel = self.indentlevel + 1
162
163 def dedent(self):
164 assert self.indentlevel > 0
165 self.indentlevel = self.indentlevel - 1
166
167 def stringifyattrs(self, *args, **kwargs):
168 if kwargs:
169 assert not args
170 attributes = sorted(kwargs.items())
171 elif args:
172 assert len(args) == 1
173 attributes = args[0]
174 else:
175 return ""
176 data = ""
177 for attr, value in attributes:
178 if not isinstance(value, (bytes, str)):
179 value = str(value)
180 data = data + ' %s="%s"' % (attr, escapeattr(value))
181 return data
182
183
184def escape(data):
185 """Escape characters not allowed in `XML 1.0 <https://www.w3.org/TR/xml/#NT-Char>`_."""
186 data = tostr(data, "utf_8")
187 data = data.replace("&", "&")
188 data = data.replace("<", "<")
189 data = data.replace(">", ">")
190 data = data.replace("\r", " ")
191
192 newData = data.translate(ILLEGAL_XML_CHARS)
193 if newData != data:
194 maxLen = 10
195 preview = repr(data)
196 if len(data) > maxLen:
197 preview = repr(data[:maxLen])[1:-1] + "..."
198 TTX_LOG.warning(
199 "Illegal XML character(s) found; replacing offending " "string %r with %r",
200 preview,
201 REPLACEMENT,
202 )
203 return newData
204
205
206def escapeattr(data):
207 data = escape(data)
208 data = data.replace('"', """)
209 return data
210
211
212def escape8bit(data):
213 """Input is Unicode string."""
214
215 def escapechar(c):
216 n = ord(c)
217 if 32 <= n <= 127 and c not in "<&>":
218 return c
219 else:
220 return "&#" + repr(n) + ";"
221
222 return strjoin(map(escapechar, data.decode("latin-1")))
223
224
225def hexStr(s):
226 h = string.hexdigits
227 r = ""
228 for c in s:
229 i = byteord(c)
230 r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
231 return r