1from __future__ import absolute_import, division, unicode_literals
2from six import text_type
3
4from ..constants import scopingElements, tableInsertModeElements, namespaces
5
6# The scope markers are inserted when entering object elements,
7# marquees, table cells, and table captions, and are used to prevent formatting
8# from "leaking" into tables, object elements, and marquees.
9Marker = None
10
11listElementsMap = {
12 None: (frozenset(scopingElements), False),
13 "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False),
14 "list": (frozenset(scopingElements | {(namespaces["html"], "ol"),
15 (namespaces["html"], "ul")}), False),
16 "table": (frozenset([(namespaces["html"], "html"),
17 (namespaces["html"], "table")]), False),
18 "select": (frozenset([(namespaces["html"], "optgroup"),
19 (namespaces["html"], "option")]), True)
20}
21
22
23class Node(object):
24 """Represents an item in the tree"""
25 def __init__(self, name):
26 """Creates a Node
27
28 :arg name: The tag name associated with the node
29
30 """
31 # The tag name associated with the node
32 self.name = name
33 # The parent of the current node (or None for the document node)
34 self.parent = None
35 # The value of the current node (applies to text nodes and comments)
36 self.value = None
37 # A dict holding name -> value pairs for attributes of the node
38 self.attributes = {}
39 # A list of child nodes of the current node. This must include all
40 # elements but not necessarily other node types.
41 self.childNodes = []
42 # A list of miscellaneous flags that can be set on the node.
43 self._flags = []
44
45 def __str__(self):
46 attributesStr = " ".join(["%s=\"%s\"" % (name, value)
47 for name, value in
48 self.attributes.items()])
49 if attributesStr:
50 return "<%s %s>" % (self.name, attributesStr)
51 else:
52 return "<%s>" % (self.name)
53
54 def __repr__(self):
55 return "<%s>" % (self.name)
56
57 def appendChild(self, node):
58 """Insert node as a child of the current node
59
60 :arg node: the node to insert
61
62 """
63 raise NotImplementedError
64
65 def insertText(self, data, insertBefore=None):
66 """Insert data as text in the current node, positioned before the
67 start of node insertBefore or to the end of the node's text.
68
69 :arg data: the data to insert
70
71 :arg insertBefore: True if you want to insert the text before the node
72 and False if you want to insert it after the node
73
74 """
75 raise NotImplementedError
76
77 def insertBefore(self, node, refNode):
78 """Insert node as a child of the current node, before refNode in the
79 list of child nodes. Raises ValueError if refNode is not a child of
80 the current node
81
82 :arg node: the node to insert
83
84 :arg refNode: the child node to insert the node before
85
86 """
87 raise NotImplementedError
88
89 def removeChild(self, node):
90 """Remove node from the children of the current node
91
92 :arg node: the child node to remove
93
94 """
95 raise NotImplementedError
96
97 def reparentChildren(self, newParent):
98 """Move all the children of the current node to newParent.
99 This is needed so that trees that don't store text as nodes move the
100 text in the correct way
101
102 :arg newParent: the node to move all this node's children to
103
104 """
105 # XXX - should this method be made more general?
106 for child in self.childNodes:
107 newParent.appendChild(child)
108 self.childNodes = []
109
110 def cloneNode(self):
111 """Return a shallow copy of the current node i.e. a node with the same
112 name and attributes but with no parent or child nodes
113 """
114 raise NotImplementedError
115
116 def hasContent(self):
117 """Return true if the node has children or text, false otherwise
118 """
119 raise NotImplementedError
120
121
122class ActiveFormattingElements(list):
123 def append(self, node):
124 equalCount = 0
125 if node != Marker:
126 for element in self[::-1]:
127 if element == Marker:
128 break
129 if self.nodesEqual(element, node):
130 equalCount += 1
131 if equalCount == 3:
132 self.remove(element)
133 break
134 list.append(self, node)
135
136 def nodesEqual(self, node1, node2):
137 if not node1.nameTuple == node2.nameTuple:
138 return False
139
140 if not node1.attributes == node2.attributes:
141 return False
142
143 return True
144
145
146class TreeBuilder(object):
147 """Base treebuilder implementation
148
149 * documentClass - the class to use for the bottommost node of a document
150 * elementClass - the class to use for HTML Elements
151 * commentClass - the class to use for comments
152 * doctypeClass - the class to use for doctypes
153
154 """
155 # pylint:disable=not-callable
156
157 # Document class
158 documentClass = None
159
160 # The class to use for creating a node
161 elementClass = None
162
163 # The class to use for creating comments
164 commentClass = None
165
166 # The class to use for creating doctypes
167 doctypeClass = None
168
169 # Fragment class
170 fragmentClass = None
171
172 def __init__(self, namespaceHTMLElements):
173 """Create a TreeBuilder
174
175 :arg namespaceHTMLElements: whether or not to namespace HTML elements
176
177 """
178 if namespaceHTMLElements:
179 self.defaultNamespace = "http://www.w3.org/1999/xhtml"
180 else:
181 self.defaultNamespace = None
182 self.reset()
183
184 def reset(self):
185 self.openElements = []
186 self.activeFormattingElements = ActiveFormattingElements()
187
188 # XXX - rename these to headElement, formElement
189 self.headPointer = None
190 self.formPointer = None
191
192 self.insertFromTable = False
193
194 self.document = self.documentClass()
195
196 def elementInScope(self, target, variant=None):
197
198 # If we pass a node in we match that. if we pass a string
199 # match any node with that name
200 exactNode = hasattr(target, "nameTuple")
201 if not exactNode:
202 if isinstance(target, text_type):
203 target = (namespaces["html"], target)
204 assert isinstance(target, tuple)
205
206 listElements, invert = listElementsMap[variant]
207
208 for node in reversed(self.openElements):
209 if exactNode and node == target:
210 return True
211 elif not exactNode and node.nameTuple == target:
212 return True
213 elif (invert ^ (node.nameTuple in listElements)):
214 return False
215
216 assert False # We should never reach this point
217
218 def reconstructActiveFormattingElements(self):
219 # Within this algorithm the order of steps described in the
220 # specification is not quite the same as the order of steps in the
221 # code. It should still do the same though.
222
223 # Step 1: stop the algorithm when there's nothing to do.
224 if not self.activeFormattingElements:
225 return
226
227 # Step 2 and step 3: we start with the last element. So i is -1.
228 i = len(self.activeFormattingElements) - 1
229 entry = self.activeFormattingElements[i]
230 if entry == Marker or entry in self.openElements:
231 return
232
233 # Step 6
234 while entry != Marker and entry not in self.openElements:
235 if i == 0:
236 # This will be reset to 0 below
237 i = -1
238 break
239 i -= 1
240 # Step 5: let entry be one earlier in the list.
241 entry = self.activeFormattingElements[i]
242
243 while True:
244 # Step 7
245 i += 1
246
247 # Step 8
248 entry = self.activeFormattingElements[i]
249 clone = entry.cloneNode() # Mainly to get a new copy of the attributes
250
251 # Step 9
252 element = self.insertElement({"type": "StartTag",
253 "name": clone.name,
254 "namespace": clone.namespace,
255 "data": clone.attributes})
256
257 # Step 10
258 self.activeFormattingElements[i] = element
259
260 # Step 11
261 if element == self.activeFormattingElements[-1]:
262 break
263
264 def clearActiveFormattingElements(self):
265 entry = self.activeFormattingElements.pop()
266 while self.activeFormattingElements and entry != Marker:
267 entry = self.activeFormattingElements.pop()
268
269 def elementInActiveFormattingElements(self, name):
270 """Check if an element exists between the end of the active
271 formatting elements and the last marker. If it does, return it, else
272 return false"""
273
274 for item in self.activeFormattingElements[::-1]:
275 # Check for Marker first because if it's a Marker it doesn't have a
276 # name attribute.
277 if item == Marker:
278 break
279 elif item.name == name:
280 return item
281 return False
282
283 def insertRoot(self, token):
284 element = self.createElement(token)
285 self.openElements.append(element)
286 self.document.appendChild(element)
287
288 def insertDoctype(self, token):
289 name = token["name"]
290 publicId = token["publicId"]
291 systemId = token["systemId"]
292
293 doctype = self.doctypeClass(name, publicId, systemId)
294 self.document.appendChild(doctype)
295
296 def insertComment(self, token, parent=None):
297 if parent is None:
298 parent = self.openElements[-1]
299 parent.appendChild(self.commentClass(token["data"]))
300
301 def createElement(self, token):
302 """Create an element but don't insert it anywhere"""
303 name = token["name"]
304 namespace = token.get("namespace", self.defaultNamespace)
305 element = self.elementClass(name, namespace)
306 element.attributes = token["data"]
307 return element
308
309 def _getInsertFromTable(self):
310 return self._insertFromTable
311
312 def _setInsertFromTable(self, value):
313 """Switch the function used to insert an element from the
314 normal one to the misnested table one and back again"""
315 self._insertFromTable = value
316 if value:
317 self.insertElement = self.insertElementTable
318 else:
319 self.insertElement = self.insertElementNormal
320
321 insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
322
323 def insertElementNormal(self, token):
324 name = token["name"]
325 assert isinstance(name, text_type), "Element %s not unicode" % name
326 namespace = token.get("namespace", self.defaultNamespace)
327 element = self.elementClass(name, namespace)
328 element.attributes = token["data"]
329 self.openElements[-1].appendChild(element)
330 self.openElements.append(element)
331 return element
332
333 def insertElementTable(self, token):
334 """Create an element and insert it into the tree"""
335 element = self.createElement(token)
336 if self.openElements[-1].name not in tableInsertModeElements:
337 return self.insertElementNormal(token)
338 else:
339 # We should be in the InTable mode. This means we want to do
340 # special magic element rearranging
341 parent, insertBefore = self.getTableMisnestedNodePosition()
342 if insertBefore is None:
343 parent.appendChild(element)
344 else:
345 parent.insertBefore(element, insertBefore)
346 self.openElements.append(element)
347 return element
348
349 def insertText(self, data, parent=None):
350 """Insert text data."""
351 if parent is None:
352 parent = self.openElements[-1]
353
354 if (not self.insertFromTable or (self.insertFromTable and
355 self.openElements[-1].name
356 not in tableInsertModeElements)):
357 parent.insertText(data)
358 else:
359 # We should be in the InTable mode. This means we want to do
360 # special magic element rearranging
361 parent, insertBefore = self.getTableMisnestedNodePosition()
362 parent.insertText(data, insertBefore)
363
364 def getTableMisnestedNodePosition(self):
365 """Get the foster parent element, and sibling to insert before
366 (or None) when inserting a misnested table node"""
367 # The foster parent element is the one which comes before the most
368 # recently opened table element
369 # XXX - this is really inelegant
370 lastTable = None
371 fosterParent = None
372 insertBefore = None
373 for elm in self.openElements[::-1]:
374 if elm.name == "table":
375 lastTable = elm
376 break
377 if lastTable:
378 # XXX - we should really check that this parent is actually a
379 # node here
380 if lastTable.parent:
381 fosterParent = lastTable.parent
382 insertBefore = lastTable
383 else:
384 fosterParent = self.openElements[
385 self.openElements.index(lastTable) - 1]
386 else:
387 fosterParent = self.openElements[0]
388 return fosterParent, insertBefore
389
390 def generateImpliedEndTags(self, exclude=None):
391 name = self.openElements[-1].name
392 # XXX td, th and tr are not actually needed
393 if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and
394 name != exclude):
395 self.openElements.pop()
396 # XXX This is not entirely what the specification says. We should
397 # investigate it more closely.
398 self.generateImpliedEndTags(exclude)
399
400 def getDocument(self):
401 """Return the final tree"""
402 return self.document
403
404 def getFragment(self):
405 """Return the final fragment"""
406 # assert self.innerHTML
407 fragment = self.fragmentClass()
408 self.openElements[0].reparentChildren(fragment)
409 return fragment
410
411 def testSerializer(self, node):
412 """Serialize the subtree of node in the format required by unit tests
413
414 :arg node: the node from which to start serializing
415
416 """
417 raise NotImplementedError