1"""xmlWriter.py -- Simple XML authoring class"""
2
3from fontTools.misc.textTools import byteord, strjoin, tobytes, tostr
4import sys
5import os
6import string
7
8INDENT = " "
9
10
11class XMLWriter(object):
12 def __init__(
13 self,
14 fileOrPath,
15 indentwhite=INDENT,
16 idlefunc=None,
17 encoding="utf_8",
18 newlinestr="\n",
19 ):
20 if encoding.lower().replace("-", "").replace("_", "") != "utf8":
21 raise Exception("Only UTF-8 encoding is supported.")
22 if fileOrPath == "-":
23 fileOrPath = sys.stdout
24 if not hasattr(fileOrPath, "write"):
25 self.filename = fileOrPath
26 self.file = open(fileOrPath, "wb")
27 self._closeStream = True
28 else:
29 self.filename = None
30 # assume writable file object
31 self.file = fileOrPath
32 self._closeStream = False
33
34 # Figure out if writer expects bytes or unicodes
35 try:
36 # The bytes check should be first. See:
37 # https://github.com/fonttools/fonttools/pull/233
38 self.file.write(b"")
39 self.totype = tobytes
40 except TypeError:
41 # This better not fail.
42 self.file.write("")
43 self.totype = tostr
44 self.indentwhite = self.totype(indentwhite)
45 if newlinestr is None:
46 self.newlinestr = self.totype(os.linesep)
47 else:
48 self.newlinestr = self.totype(newlinestr)
49 self.indentlevel = 0
50 self.stack = []
51 self.needindent = 1
52 self.idlefunc = idlefunc
53 self.idlecounter = 0
54 self._writeraw('<?xml version="1.0" encoding="UTF-8"?>')
55 self.newline()
56
57 def __enter__(self):
58 return self
59
60 def __exit__(self, exception_type, exception_value, traceback):
61 self.close()
62
63 def close(self):
64 if self._closeStream:
65 self.file.close()
66
67 def write(self, string, indent=True):
68 """Writes text."""
69 self._writeraw(escape(string), indent=indent)
70
71 def writecdata(self, string):
72 """Writes text in a CDATA section."""
73 self._writeraw("<![CDATA[" + string + "]]>")
74
75 def write8bit(self, data, strip=False):
76 """Writes a bytes() sequence into the XML, escaping
77 non-ASCII bytes. When this is read in xmlReader,
78 the original bytes can be recovered by encoding to
79 'latin-1'."""
80 self._writeraw(escape8bit(data), strip=strip)
81
82 def write_noindent(self, string):
83 """Writes text without indentation."""
84 self._writeraw(escape(string), indent=False)
85
86 def _writeraw(self, data, indent=True, strip=False):
87 """Writes bytes, possibly indented."""
88 if indent and self.needindent:
89 self.file.write(self.indentlevel * self.indentwhite)
90 self.needindent = 0
91 s = self.totype(data, encoding="utf_8")
92 if strip:
93 s = s.strip()
94 self.file.write(s)
95
96 def newline(self):
97 self.file.write(self.newlinestr)
98 self.needindent = 1
99 idlecounter = self.idlecounter
100 if not idlecounter % 100 and self.idlefunc is not None:
101 self.idlefunc()
102 self.idlecounter = idlecounter + 1
103
104 def comment(self, data):
105 data = escape(data)
106 lines = data.split("\n")
107 self._writeraw("<!-- " + lines[0])
108 for line in lines[1:]:
109 self.newline()
110 self._writeraw(" " + line)
111 self._writeraw(" -->")
112
113 def simpletag(self, _TAG_, *args, **kwargs):
114 attrdata = self.stringifyattrs(*args, **kwargs)
115 data = "<%s%s/>" % (_TAG_, attrdata)
116 self._writeraw(data)
117
118 def begintag(self, _TAG_, *args, **kwargs):
119 attrdata = self.stringifyattrs(*args, **kwargs)
120 data = "<%s%s>" % (_TAG_, attrdata)
121 self._writeraw(data)
122 self.stack.append(_TAG_)
123 self.indent()
124
125 def endtag(self, _TAG_):
126 assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag"
127 del self.stack[-1]
128 self.dedent()
129 data = "</%s>" % _TAG_
130 self._writeraw(data)
131
132 def dumphex(self, data):
133 linelength = 16
134 hexlinelength = linelength * 2
135 chunksize = 8
136 for i in range(0, len(data), linelength):
137 hexline = hexStr(data[i : i + linelength])
138 line = ""
139 white = ""
140 for j in range(0, hexlinelength, chunksize):
141 line = line + white + hexline[j : j + chunksize]
142 white = " "
143 self._writeraw(line)
144 self.newline()
145
146 def indent(self):
147 self.indentlevel = self.indentlevel + 1
148
149 def dedent(self):
150 assert self.indentlevel > 0
151 self.indentlevel = self.indentlevel - 1
152
153 def stringifyattrs(self, *args, **kwargs):
154 if kwargs:
155 assert not args
156 attributes = sorted(kwargs.items())
157 elif args:
158 assert len(args) == 1
159 attributes = args[0]
160 else:
161 return ""
162 data = ""
163 for attr, value in attributes:
164 if not isinstance(value, (bytes, str)):
165 value = str(value)
166 data = data + ' %s="%s"' % (attr, escapeattr(value))
167 return data
168
169
170def escape(data):
171 data = tostr(data, "utf_8")
172 data = data.replace("&", "&")
173 data = data.replace("<", "<")
174 data = data.replace(">", ">")
175 data = data.replace("\r", " ")
176 return data
177
178
179def escapeattr(data):
180 data = escape(data)
181 data = data.replace('"', """)
182 return data
183
184
185def escape8bit(data):
186 """Input is Unicode string."""
187
188 def escapechar(c):
189 n = ord(c)
190 if 32 <= n <= 127 and c not in "<&>":
191 return c
192 else:
193 return "&#" + repr(n) + ";"
194
195 return strjoin(map(escapechar, data.decode("latin-1")))
196
197
198def hexStr(s):
199 h = string.hexdigits
200 r = ""
201 for c in s:
202 i = byteord(c)
203 r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
204 return r