1from __future__ import absolute_import, division, unicode_literals
2from six import text_type
3
4from ..constants import scopingElements, tableInsertModeElements, namespaces
5
6# The scope markers are inserted when entering object elements,
7# marquees, table cells, and table captions, and are used to prevent formatting
8# from "leaking" into tables, object elements, and marquees.
9Marker = None
10
11listElementsMap = {
12 None: (frozenset(scopingElements), False),
13 "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False),
14 "list": (frozenset(scopingElements | {(namespaces["html"], "ol"),
15 (namespaces["html"], "ul")}), False),
16 "table": (frozenset([(namespaces["html"], "html"),
17 (namespaces["html"], "table")]), False),
18 "select": (frozenset([(namespaces["html"], "optgroup"),
19 (namespaces["html"], "option")]), True)
20}
21
22
23class Node(object):
24 """Represents an item in the tree"""
25 def __init__(self, name):
26 """Creates a Node
27
28 :arg name: The tag name associated with the node
29
30 """
31 # The tag name associated with the node
32 self.name = name
33 # The parent of the current node (or None for the document node)
34 self.parent = None
35 # The value of the current node (applies to text nodes and comments)
36 self.value = None
37 # A dict holding name -> value pairs for attributes of the node
38 self.attributes = {}
39 # A list of child nodes of the current node. This must include all
40 # elements but not necessarily other node types.
41 self.childNodes = []
42 # A list of miscellaneous flags that can be set on the node.
43 self._flags = []
44
45 def __str__(self):
46 attributesStr = " ".join(["%s=\"%s\"" % (name, value)
47 for name, value in
48 self.attributes.items()])
49 if attributesStr:
50 return "<%s %s>" % (self.name, attributesStr)
51 else:
52 return "<%s>" % (self.name)
53
54 def __repr__(self):
55 return "<%s>" % (self.name)
56
57 def appendChild(self, node):
58 """Insert node as a child of the current node
59
60 :arg node: the node to insert
61
62 """
63 raise NotImplementedError
64
65 def insertText(self, data, insertBefore=None):
66 """Insert data as text in the current node, positioned before the
67 start of node insertBefore or to the end of the node's text.
68
69 :arg data: the data to insert
70
71 :arg insertBefore: True if you want to insert the text before the node
72 and False if you want to insert it after the node
73
74 """
75 raise NotImplementedError
76
77 def insertBefore(self, node, refNode):
78 """Insert node as a child of the current node, before refNode in the
79 list of child nodes. Raises ValueError if refNode is not a child of
80 the current node
81
82 :arg node: the node to insert
83
84 :arg refNode: the child node to insert the node before
85
86 """
87 raise NotImplementedError
88
89 def removeChild(self, node):
90 """Remove node from the children of the current node
91
92 :arg node: the child node to remove
93
94 """
95 raise NotImplementedError
96
97 def reparentChildren(self, newParent):
98 """Move all the children of the current node to newParent.
99 This is needed so that trees that don't store text as nodes move the
100 text in the correct way
101
102 :arg newParent: the node to move all this node's children to
103
104 """
105 # XXX - should this method be made more general?
106 for child in self.childNodes:
107 newParent.appendChild(child)
108 self.childNodes = []
109
110 def cloneNode(self):
111 """Return a shallow copy of the current node i.e. a node with the same
112 name and attributes but with no parent or child nodes
113 """
114 raise NotImplementedError
115
116 def hasContent(self):
117 """Return true if the node has children or text, false otherwise
118 """
119 raise NotImplementedError
120
121
122class ActiveFormattingElements(list):
123 def append(self, node):
124 """Append node to the end of the list."""
125 equalCount = 0
126 if node != Marker:
127 for element in self[::-1]:
128 if element == Marker:
129 break
130 if self.nodesEqual(element, node):
131 equalCount += 1
132 if equalCount == 3:
133 self.remove(element)
134 break
135 list.append(self, node)
136
137 def nodesEqual(self, node1, node2):
138 if not node1.nameTuple == node2.nameTuple:
139 return False
140
141 if not node1.attributes == node2.attributes:
142 return False
143
144 return True
145
146
147class TreeBuilder(object):
148 """Base treebuilder implementation
149
150 * documentClass - the class to use for the bottommost node of a document
151 * elementClass - the class to use for HTML Elements
152 * commentClass - the class to use for comments
153 * doctypeClass - the class to use for doctypes
154
155 """
156 # pylint:disable=not-callable
157
158 # Document class
159 documentClass = None
160
161 # The class to use for creating a node
162 elementClass = None
163
164 # The class to use for creating comments
165 commentClass = None
166
167 # The class to use for creating doctypes
168 doctypeClass = None
169
170 # Fragment class
171 fragmentClass = None
172
173 def __init__(self, namespaceHTMLElements):
174 """Create a TreeBuilder
175
176 :arg namespaceHTMLElements: whether or not to namespace HTML elements
177
178 """
179 if namespaceHTMLElements:
180 self.defaultNamespace = "http://www.w3.org/1999/xhtml"
181 else:
182 self.defaultNamespace = None
183 self.reset()
184
185 def reset(self):
186 self.openElements = []
187 self.activeFormattingElements = ActiveFormattingElements()
188
189 # XXX - rename these to headElement, formElement
190 self.headPointer = None
191 self.formPointer = None
192
193 self.insertFromTable = False
194
195 self.document = self.documentClass()
196
197 def elementInScope(self, target, variant=None):
198
199 # If we pass a node in we match that. if we pass a string
200 # match any node with that name
201 exactNode = hasattr(target, "nameTuple")
202 if not exactNode:
203 if isinstance(target, text_type):
204 target = (namespaces["html"], target)
205 assert isinstance(target, tuple)
206
207 listElements, invert = listElementsMap[variant]
208
209 for node in reversed(self.openElements):
210 if exactNode and node == target:
211 return True
212 elif not exactNode and node.nameTuple == target:
213 return True
214 elif (invert ^ (node.nameTuple in listElements)):
215 return False
216
217 assert False # We should never reach this point
218
219 def reconstructActiveFormattingElements(self):
220 # Within this algorithm the order of steps described in the
221 # specification is not quite the same as the order of steps in the
222 # code. It should still do the same though.
223
224 # Step 1: stop the algorithm when there's nothing to do.
225 if not self.activeFormattingElements:
226 return
227
228 # Step 2 and step 3: we start with the last element. So i is -1.
229 i = len(self.activeFormattingElements) - 1
230 entry = self.activeFormattingElements[i]
231 if entry == Marker or entry in self.openElements:
232 return
233
234 # Step 6
235 while entry != Marker and entry not in self.openElements:
236 if i == 0:
237 # This will be reset to 0 below
238 i = -1
239 break
240 i -= 1
241 # Step 5: let entry be one earlier in the list.
242 entry = self.activeFormattingElements[i]
243
244 while True:
245 # Step 7
246 i += 1
247
248 # Step 8
249 entry = self.activeFormattingElements[i]
250 clone = entry.cloneNode() # Mainly to get a new copy of the attributes
251
252 # Step 9
253 element = self.insertElement({"type": "StartTag",
254 "name": clone.name,
255 "namespace": clone.namespace,
256 "data": clone.attributes})
257
258 # Step 10
259 self.activeFormattingElements[i] = element
260
261 # Step 11
262 if element == self.activeFormattingElements[-1]:
263 break
264
265 def clearActiveFormattingElements(self):
266 entry = self.activeFormattingElements.pop()
267 while self.activeFormattingElements and entry != Marker:
268 entry = self.activeFormattingElements.pop()
269
270 def elementInActiveFormattingElements(self, name):
271 """Check if an element exists between the end of the active
272 formatting elements and the last marker. If it does, return it, else
273 return false"""
274
275 for item in self.activeFormattingElements[::-1]:
276 # Check for Marker first because if it's a Marker it doesn't have a
277 # name attribute.
278 if item == Marker:
279 break
280 elif item.name == name:
281 return item
282 return False
283
284 def insertRoot(self, token):
285 element = self.createElement(token)
286 self.openElements.append(element)
287 self.document.appendChild(element)
288
289 def insertDoctype(self, token):
290 name = token["name"]
291 publicId = token["publicId"]
292 systemId = token["systemId"]
293
294 doctype = self.doctypeClass(name, publicId, systemId)
295 self.document.appendChild(doctype)
296
297 def insertComment(self, token, parent=None):
298 if parent is None:
299 parent = self.openElements[-1]
300 parent.appendChild(self.commentClass(token["data"]))
301
302 def createElement(self, token):
303 """Create an element but don't insert it anywhere"""
304 name = token["name"]
305 namespace = token.get("namespace", self.defaultNamespace)
306 element = self.elementClass(name, namespace)
307 element.attributes = token["data"]
308 return element
309
310 def _getInsertFromTable(self):
311 return self._insertFromTable
312
313 def _setInsertFromTable(self, value):
314 """Switch the function used to insert an element from the
315 normal one to the misnested table one and back again"""
316 self._insertFromTable = value
317 if value:
318 self.insertElement = self.insertElementTable
319 else:
320 self.insertElement = self.insertElementNormal
321
322 insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
323
324 def insertElementNormal(self, token):
325 name = token["name"]
326 assert isinstance(name, text_type), "Element %s not unicode" % name
327 namespace = token.get("namespace", self.defaultNamespace)
328 element = self.elementClass(name, namespace)
329 element.attributes = token["data"]
330 self.openElements[-1].appendChild(element)
331 self.openElements.append(element)
332 return element
333
334 def insertElementTable(self, token):
335 """Create an element and insert it into the tree"""
336 element = self.createElement(token)
337 if self.openElements[-1].name not in tableInsertModeElements:
338 return self.insertElementNormal(token)
339 else:
340 # We should be in the InTable mode. This means we want to do
341 # special magic element rearranging
342 parent, insertBefore = self.getTableMisnestedNodePosition()
343 if insertBefore is None:
344 parent.appendChild(element)
345 else:
346 parent.insertBefore(element, insertBefore)
347 self.openElements.append(element)
348 return element
349
350 def insertText(self, data, parent=None):
351 """Insert text data."""
352 if parent is None:
353 parent = self.openElements[-1]
354
355 if (not self.insertFromTable or (self.insertFromTable and
356 self.openElements[-1].name
357 not in tableInsertModeElements)):
358 parent.insertText(data)
359 else:
360 # We should be in the InTable mode. This means we want to do
361 # special magic element rearranging
362 parent, insertBefore = self.getTableMisnestedNodePosition()
363 parent.insertText(data, insertBefore)
364
365 def getTableMisnestedNodePosition(self):
366 """Get the foster parent element, and sibling to insert before
367 (or None) when inserting a misnested table node"""
368 # The foster parent element is the one which comes before the most
369 # recently opened table element
370 # XXX - this is really inelegant
371 lastTable = None
372 fosterParent = None
373 insertBefore = None
374 for elm in self.openElements[::-1]:
375 if elm.name == "table":
376 lastTable = elm
377 break
378 if lastTable:
379 # XXX - we should really check that this parent is actually a
380 # node here
381 if lastTable.parent:
382 fosterParent = lastTable.parent
383 insertBefore = lastTable
384 else:
385 fosterParent = self.openElements[
386 self.openElements.index(lastTable) - 1]
387 else:
388 fosterParent = self.openElements[0]
389 return fosterParent, insertBefore
390
391 def generateImpliedEndTags(self, exclude=None):
392 name = self.openElements[-1].name
393 # XXX td, th and tr are not actually needed
394 if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and
395 name != exclude):
396 self.openElements.pop()
397 # XXX This is not entirely what the specification says. We should
398 # investigate it more closely.
399 self.generateImpliedEndTags(exclude)
400
401 def getDocument(self):
402 """Return the final tree"""
403 return self.document
404
405 def getFragment(self):
406 """Return the final fragment"""
407 # assert self.innerHTML
408 fragment = self.fragmentClass()
409 self.openElements[0].reparentChildren(fragment)
410 return fragment
411
412 def testSerializer(self, node):
413 """Serialize the subtree of node in the format required by unit tests
414
415 :arg node: the node from which to start serializing
416
417 """
418 raise NotImplementedError