Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/html5lib/treebuilders/etree.py: 60%

247 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:18 +0000

1from __future__ import absolute_import, division, unicode_literals 

2# pylint:disable=protected-access 

3 

4from six import text_type 

5 

6import re 

7 

8from copy import copy 

9 

10from . import base 

11from .. import _ihatexml 

12from .. import constants 

13from ..constants import namespaces 

14from .._utils import moduleFactoryFactory 

15 

16tag_regexp = re.compile("{([^}]*)}(.*)") 

17 

18 

19def getETreeBuilder(ElementTreeImplementation, fullTree=False): 

20 ElementTree = ElementTreeImplementation 

21 ElementTreeCommentType = ElementTree.Comment("asd").tag 

22 

23 class Element(base.Node): 

24 def __init__(self, name, namespace=None): 

25 self._name = name 

26 self._namespace = namespace 

27 self._element = ElementTree.Element(self._getETreeTag(name, 

28 namespace)) 

29 if namespace is None: 

30 self.nameTuple = namespaces["html"], self._name 

31 else: 

32 self.nameTuple = self._namespace, self._name 

33 self.parent = None 

34 self._childNodes = [] 

35 self._flags = [] 

36 

37 def _getETreeTag(self, name, namespace): 

38 if namespace is None: 

39 etree_tag = name 

40 else: 

41 etree_tag = "{%s}%s" % (namespace, name) 

42 return etree_tag 

43 

44 def _setName(self, name): 

45 self._name = name 

46 self._element.tag = self._getETreeTag(self._name, self._namespace) 

47 

48 def _getName(self): 

49 return self._name 

50 

51 name = property(_getName, _setName) 

52 

53 def _setNamespace(self, namespace): 

54 self._namespace = namespace 

55 self._element.tag = self._getETreeTag(self._name, self._namespace) 

56 

57 def _getNamespace(self): 

58 return self._namespace 

59 

60 namespace = property(_getNamespace, _setNamespace) 

61 

62 def _getAttributes(self): 

63 return self._element.attrib 

64 

65 def _setAttributes(self, attributes): 

66 el_attrib = self._element.attrib 

67 el_attrib.clear() 

68 if attributes: 

69 # calling .items _always_ allocates, and the above truthy check is cheaper than the 

70 # allocation on average 

71 for key, value in attributes.items(): 

72 if isinstance(key, tuple): 

73 name = "{%s}%s" % (key[2], key[1]) 

74 else: 

75 name = key 

76 el_attrib[name] = value 

77 

78 attributes = property(_getAttributes, _setAttributes) 

79 

80 def _getChildNodes(self): 

81 return self._childNodes 

82 

83 def _setChildNodes(self, value): 

84 del self._element[:] 

85 self._childNodes = [] 

86 for element in value: 

87 self.insertChild(element) 

88 

89 childNodes = property(_getChildNodes, _setChildNodes) 

90 

91 def hasContent(self): 

92 """Return true if the node has children or text""" 

93 return bool(self._element.text or len(self._element)) 

94 

95 def appendChild(self, node): 

96 self._childNodes.append(node) 

97 self._element.append(node._element) 

98 node.parent = self 

99 

100 def insertBefore(self, node, refNode): 

101 index = list(self._element).index(refNode._element) 

102 self._element.insert(index, node._element) 

103 node.parent = self 

104 

105 def removeChild(self, node): 

106 self._childNodes.remove(node) 

107 self._element.remove(node._element) 

108 node.parent = None 

109 

110 def insertText(self, data, insertBefore=None): 

111 if not len(self._element): 

112 if not self._element.text: 

113 self._element.text = "" 

114 self._element.text += data 

115 elif insertBefore is None: 

116 # Insert the text as the tail of the last child element 

117 if not self._element[-1].tail: 

118 self._element[-1].tail = "" 

119 self._element[-1].tail += data 

120 else: 

121 # Insert the text before the specified node 

122 children = list(self._element) 

123 index = children.index(insertBefore._element) 

124 if index > 0: 

125 if not self._element[index - 1].tail: 

126 self._element[index - 1].tail = "" 

127 self._element[index - 1].tail += data 

128 else: 

129 if not self._element.text: 

130 self._element.text = "" 

131 self._element.text += data 

132 

133 def cloneNode(self): 

134 element = type(self)(self.name, self.namespace) 

135 if self._element.attrib: 

136 element._element.attrib = copy(self._element.attrib) 

137 return element 

138 

139 def reparentChildren(self, newParent): 

140 if newParent.childNodes: 

141 newParent.childNodes[-1]._element.tail += self._element.text 

142 else: 

143 if not newParent._element.text: 

144 newParent._element.text = "" 

145 if self._element.text is not None: 

146 newParent._element.text += self._element.text 

147 self._element.text = "" 

148 base.Node.reparentChildren(self, newParent) 

149 

150 class Comment(Element): 

151 def __init__(self, data): 

152 # Use the superclass constructor to set all properties on the 

153 # wrapper element 

154 self._element = ElementTree.Comment(data) 

155 self.parent = None 

156 self._childNodes = [] 

157 self._flags = [] 

158 

159 def _getData(self): 

160 return self._element.text 

161 

162 def _setData(self, value): 

163 self._element.text = value 

164 

165 data = property(_getData, _setData) 

166 

167 class DocumentType(Element): 

168 def __init__(self, name, publicId, systemId): 

169 Element.__init__(self, "<!DOCTYPE>") 

170 self._element.text = name 

171 self.publicId = publicId 

172 self.systemId = systemId 

173 

174 def _getPublicId(self): 

175 return self._element.get("publicId", "") 

176 

177 def _setPublicId(self, value): 

178 if value is not None: 

179 self._element.set("publicId", value) 

180 

181 publicId = property(_getPublicId, _setPublicId) 

182 

183 def _getSystemId(self): 

184 return self._element.get("systemId", "") 

185 

186 def _setSystemId(self, value): 

187 if value is not None: 

188 self._element.set("systemId", value) 

189 

190 systemId = property(_getSystemId, _setSystemId) 

191 

192 class Document(Element): 

193 def __init__(self): 

194 Element.__init__(self, "DOCUMENT_ROOT") 

195 

196 class DocumentFragment(Element): 

197 def __init__(self): 

198 Element.__init__(self, "DOCUMENT_FRAGMENT") 

199 

200 def testSerializer(element): 

201 rv = [] 

202 

203 def serializeElement(element, indent=0): 

204 if not hasattr(element, "tag"): 

205 element = element.getroot() 

206 if element.tag == "<!DOCTYPE>": 

207 if element.get("publicId") or element.get("systemId"): 

208 publicId = element.get("publicId") or "" 

209 systemId = element.get("systemId") or "" 

210 rv.append("""<!DOCTYPE %s "%s" "%s">""" % 

211 (element.text, publicId, systemId)) 

212 else: 

213 rv.append("<!DOCTYPE %s>" % (element.text,)) 

214 elif element.tag == "DOCUMENT_ROOT": 

215 rv.append("#document") 

216 if element.text is not None: 

217 rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) 

218 if element.tail is not None: 

219 raise TypeError("Document node cannot have tail") 

220 if hasattr(element, "attrib") and len(element.attrib): 

221 raise TypeError("Document node cannot have attributes") 

222 elif element.tag == ElementTreeCommentType: 

223 rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) 

224 else: 

225 assert isinstance(element.tag, text_type), \ 

226 "Expected unicode, got %s, %s" % (type(element.tag), element.tag) 

227 nsmatch = tag_regexp.match(element.tag) 

228 

229 if nsmatch is None: 

230 name = element.tag 

231 else: 

232 ns, name = nsmatch.groups() 

233 prefix = constants.prefixes[ns] 

234 name = "%s %s" % (prefix, name) 

235 rv.append("|%s<%s>" % (' ' * indent, name)) 

236 

237 if hasattr(element, "attrib"): 

238 attributes = [] 

239 for name, value in element.attrib.items(): 

240 nsmatch = tag_regexp.match(name) 

241 if nsmatch is not None: 

242 ns, name = nsmatch.groups() 

243 prefix = constants.prefixes[ns] 

244 attr_string = "%s %s" % (prefix, name) 

245 else: 

246 attr_string = name 

247 attributes.append((attr_string, value)) 

248 

249 for name, value in sorted(attributes): 

250 rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) 

251 if element.text: 

252 rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) 

253 indent += 2 

254 for child in element: 

255 serializeElement(child, indent) 

256 if element.tail: 

257 rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) 

258 serializeElement(element, 0) 

259 

260 return "\n".join(rv) 

261 

262 def tostring(element): # pylint:disable=unused-variable 

263 """Serialize an element and its child nodes to a string""" 

264 rv = [] 

265 filter = _ihatexml.InfosetFilter() 

266 

267 def serializeElement(element): 

268 if isinstance(element, ElementTree.ElementTree): 

269 element = element.getroot() 

270 

271 if element.tag == "<!DOCTYPE>": 

272 if element.get("publicId") or element.get("systemId"): 

273 publicId = element.get("publicId") or "" 

274 systemId = element.get("systemId") or "" 

275 rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" % 

276 (element.text, publicId, systemId)) 

277 else: 

278 rv.append("<!DOCTYPE %s>" % (element.text,)) 

279 elif element.tag == "DOCUMENT_ROOT": 

280 if element.text is not None: 

281 rv.append(element.text) 

282 if element.tail is not None: 

283 raise TypeError("Document node cannot have tail") 

284 if hasattr(element, "attrib") and len(element.attrib): 

285 raise TypeError("Document node cannot have attributes") 

286 

287 for child in element: 

288 serializeElement(child) 

289 

290 elif element.tag == ElementTreeCommentType: 

291 rv.append("<!--%s-->" % (element.text,)) 

292 else: 

293 # This is assumed to be an ordinary element 

294 if not element.attrib: 

295 rv.append("<%s>" % (filter.fromXmlName(element.tag),)) 

296 else: 

297 attr = " ".join(["%s=\"%s\"" % ( 

298 filter.fromXmlName(name), value) 

299 for name, value in element.attrib.items()]) 

300 rv.append("<%s %s>" % (element.tag, attr)) 

301 if element.text: 

302 rv.append(element.text) 

303 

304 for child in element: 

305 serializeElement(child) 

306 

307 rv.append("</%s>" % (element.tag,)) 

308 

309 if element.tail: 

310 rv.append(element.tail) 

311 

312 serializeElement(element) 

313 

314 return "".join(rv) 

315 

316 class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable 

317 documentClass = Document 

318 doctypeClass = DocumentType 

319 elementClass = Element 

320 commentClass = Comment 

321 fragmentClass = DocumentFragment 

322 implementation = ElementTreeImplementation 

323 

324 def testSerializer(self, element): 

325 return testSerializer(element) 

326 

327 def getDocument(self): 

328 if fullTree: 

329 return self.document._element 

330 else: 

331 if self.defaultNamespace is not None: 

332 return self.document._element.find( 

333 "{%s}html" % self.defaultNamespace) 

334 else: 

335 return self.document._element.find("html") 

336 

337 def getFragment(self): 

338 return base.TreeBuilder.getFragment(self)._element 

339 

340 return locals() 

341 

342 

343getETreeModule = moduleFactoryFactory(getETreeBuilder)