Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/html5lib/treebuilders/base.py: 53%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

201 statements  

1from __future__ import absolute_import, division, unicode_literals 

2from six import text_type 

3 

4from ..constants import scopingElements, tableInsertModeElements, namespaces 

5 

6# The scope markers are inserted when entering object elements, 

7# marquees, table cells, and table captions, and are used to prevent formatting 

8# from "leaking" into tables, object elements, and marquees. 

9Marker = None 

10 

11listElementsMap = { 

12 None: (frozenset(scopingElements), False), 

13 "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False), 

14 "list": (frozenset(scopingElements | {(namespaces["html"], "ol"), 

15 (namespaces["html"], "ul")}), False), 

16 "table": (frozenset([(namespaces["html"], "html"), 

17 (namespaces["html"], "table")]), False), 

18 "select": (frozenset([(namespaces["html"], "optgroup"), 

19 (namespaces["html"], "option")]), True) 

20} 

21 

22 

23class Node(object): 

24 """Represents an item in the tree""" 

25 def __init__(self, name): 

26 """Creates a Node 

27 

28 :arg name: The tag name associated with the node 

29 

30 """ 

31 # The tag name associated with the node 

32 self.name = name 

33 # The parent of the current node (or None for the document node) 

34 self.parent = None 

35 # The value of the current node (applies to text nodes and comments) 

36 self.value = None 

37 # A dict holding name -> value pairs for attributes of the node 

38 self.attributes = {} 

39 # A list of child nodes of the current node. This must include all 

40 # elements but not necessarily other node types. 

41 self.childNodes = [] 

42 # A list of miscellaneous flags that can be set on the node. 

43 self._flags = [] 

44 

45 def __str__(self): 

46 attributesStr = " ".join(["%s=\"%s\"" % (name, value) 

47 for name, value in 

48 self.attributes.items()]) 

49 if attributesStr: 

50 return "<%s %s>" % (self.name, attributesStr) 

51 else: 

52 return "<%s>" % (self.name) 

53 

54 def __repr__(self): 

55 return "<%s>" % (self.name) 

56 

57 def appendChild(self, node): 

58 """Insert node as a child of the current node 

59 

60 :arg node: the node to insert 

61 

62 """ 

63 raise NotImplementedError 

64 

65 def insertText(self, data, insertBefore=None): 

66 """Insert data as text in the current node, positioned before the 

67 start of node insertBefore or to the end of the node's text. 

68 

69 :arg data: the data to insert 

70 

71 :arg insertBefore: True if you want to insert the text before the node 

72 and False if you want to insert it after the node 

73 

74 """ 

75 raise NotImplementedError 

76 

77 def insertBefore(self, node, refNode): 

78 """Insert node as a child of the current node, before refNode in the 

79 list of child nodes. Raises ValueError if refNode is not a child of 

80 the current node 

81 

82 :arg node: the node to insert 

83 

84 :arg refNode: the child node to insert the node before 

85 

86 """ 

87 raise NotImplementedError 

88 

89 def removeChild(self, node): 

90 """Remove node from the children of the current node 

91 

92 :arg node: the child node to remove 

93 

94 """ 

95 raise NotImplementedError 

96 

97 def reparentChildren(self, newParent): 

98 """Move all the children of the current node to newParent. 

99 This is needed so that trees that don't store text as nodes move the 

100 text in the correct way 

101 

102 :arg newParent: the node to move all this node's children to 

103 

104 """ 

105 # XXX - should this method be made more general? 

106 for child in self.childNodes: 

107 newParent.appendChild(child) 

108 self.childNodes = [] 

109 

110 def cloneNode(self): 

111 """Return a shallow copy of the current node i.e. a node with the same 

112 name and attributes but with no parent or child nodes 

113 """ 

114 raise NotImplementedError 

115 

116 def hasContent(self): 

117 """Return true if the node has children or text, false otherwise 

118 """ 

119 raise NotImplementedError 

120 

121 

122class ActiveFormattingElements(list): 

123 def append(self, node): 

124 equalCount = 0 

125 if node != Marker: 

126 for element in self[::-1]: 

127 if element == Marker: 

128 break 

129 if self.nodesEqual(element, node): 

130 equalCount += 1 

131 if equalCount == 3: 

132 self.remove(element) 

133 break 

134 list.append(self, node) 

135 

136 def nodesEqual(self, node1, node2): 

137 if not node1.nameTuple == node2.nameTuple: 

138 return False 

139 

140 if not node1.attributes == node2.attributes: 

141 return False 

142 

143 return True 

144 

145 

146class TreeBuilder(object): 

147 """Base treebuilder implementation 

148 

149 * documentClass - the class to use for the bottommost node of a document 

150 * elementClass - the class to use for HTML Elements 

151 * commentClass - the class to use for comments 

152 * doctypeClass - the class to use for doctypes 

153 

154 """ 

155 # pylint:disable=not-callable 

156 

157 # Document class 

158 documentClass = None 

159 

160 # The class to use for creating a node 

161 elementClass = None 

162 

163 # The class to use for creating comments 

164 commentClass = None 

165 

166 # The class to use for creating doctypes 

167 doctypeClass = None 

168 

169 # Fragment class 

170 fragmentClass = None 

171 

172 def __init__(self, namespaceHTMLElements): 

173 """Create a TreeBuilder 

174 

175 :arg namespaceHTMLElements: whether or not to namespace HTML elements 

176 

177 """ 

178 if namespaceHTMLElements: 

179 self.defaultNamespace = "http://www.w3.org/1999/xhtml" 

180 else: 

181 self.defaultNamespace = None 

182 self.reset() 

183 

184 def reset(self): 

185 self.openElements = [] 

186 self.activeFormattingElements = ActiveFormattingElements() 

187 

188 # XXX - rename these to headElement, formElement 

189 self.headPointer = None 

190 self.formPointer = None 

191 

192 self.insertFromTable = False 

193 

194 self.document = self.documentClass() 

195 

196 def elementInScope(self, target, variant=None): 

197 

198 # If we pass a node in we match that. if we pass a string 

199 # match any node with that name 

200 exactNode = hasattr(target, "nameTuple") 

201 if not exactNode: 

202 if isinstance(target, text_type): 

203 target = (namespaces["html"], target) 

204 assert isinstance(target, tuple) 

205 

206 listElements, invert = listElementsMap[variant] 

207 

208 for node in reversed(self.openElements): 

209 if exactNode and node == target: 

210 return True 

211 elif not exactNode and node.nameTuple == target: 

212 return True 

213 elif (invert ^ (node.nameTuple in listElements)): 

214 return False 

215 

216 assert False # We should never reach this point 

217 

218 def reconstructActiveFormattingElements(self): 

219 # Within this algorithm the order of steps described in the 

220 # specification is not quite the same as the order of steps in the 

221 # code. It should still do the same though. 

222 

223 # Step 1: stop the algorithm when there's nothing to do. 

224 if not self.activeFormattingElements: 

225 return 

226 

227 # Step 2 and step 3: we start with the last element. So i is -1. 

228 i = len(self.activeFormattingElements) - 1 

229 entry = self.activeFormattingElements[i] 

230 if entry == Marker or entry in self.openElements: 

231 return 

232 

233 # Step 6 

234 while entry != Marker and entry not in self.openElements: 

235 if i == 0: 

236 # This will be reset to 0 below 

237 i = -1 

238 break 

239 i -= 1 

240 # Step 5: let entry be one earlier in the list. 

241 entry = self.activeFormattingElements[i] 

242 

243 while True: 

244 # Step 7 

245 i += 1 

246 

247 # Step 8 

248 entry = self.activeFormattingElements[i] 

249 clone = entry.cloneNode() # Mainly to get a new copy of the attributes 

250 

251 # Step 9 

252 element = self.insertElement({"type": "StartTag", 

253 "name": clone.name, 

254 "namespace": clone.namespace, 

255 "data": clone.attributes}) 

256 

257 # Step 10 

258 self.activeFormattingElements[i] = element 

259 

260 # Step 11 

261 if element == self.activeFormattingElements[-1]: 

262 break 

263 

264 def clearActiveFormattingElements(self): 

265 entry = self.activeFormattingElements.pop() 

266 while self.activeFormattingElements and entry != Marker: 

267 entry = self.activeFormattingElements.pop() 

268 

269 def elementInActiveFormattingElements(self, name): 

270 """Check if an element exists between the end of the active 

271 formatting elements and the last marker. If it does, return it, else 

272 return false""" 

273 

274 for item in self.activeFormattingElements[::-1]: 

275 # Check for Marker first because if it's a Marker it doesn't have a 

276 # name attribute. 

277 if item == Marker: 

278 break 

279 elif item.name == name: 

280 return item 

281 return False 

282 

283 def insertRoot(self, token): 

284 element = self.createElement(token) 

285 self.openElements.append(element) 

286 self.document.appendChild(element) 

287 

288 def insertDoctype(self, token): 

289 name = token["name"] 

290 publicId = token["publicId"] 

291 systemId = token["systemId"] 

292 

293 doctype = self.doctypeClass(name, publicId, systemId) 

294 self.document.appendChild(doctype) 

295 

296 def insertComment(self, token, parent=None): 

297 if parent is None: 

298 parent = self.openElements[-1] 

299 parent.appendChild(self.commentClass(token["data"])) 

300 

301 def createElement(self, token): 

302 """Create an element but don't insert it anywhere""" 

303 name = token["name"] 

304 namespace = token.get("namespace", self.defaultNamespace) 

305 element = self.elementClass(name, namespace) 

306 element.attributes = token["data"] 

307 return element 

308 

309 def _getInsertFromTable(self): 

310 return self._insertFromTable 

311 

312 def _setInsertFromTable(self, value): 

313 """Switch the function used to insert an element from the 

314 normal one to the misnested table one and back again""" 

315 self._insertFromTable = value 

316 if value: 

317 self.insertElement = self.insertElementTable 

318 else: 

319 self.insertElement = self.insertElementNormal 

320 

321 insertFromTable = property(_getInsertFromTable, _setInsertFromTable) 

322 

323 def insertElementNormal(self, token): 

324 name = token["name"] 

325 assert isinstance(name, text_type), "Element %s not unicode" % name 

326 namespace = token.get("namespace", self.defaultNamespace) 

327 element = self.elementClass(name, namespace) 

328 element.attributes = token["data"] 

329 self.openElements[-1].appendChild(element) 

330 self.openElements.append(element) 

331 return element 

332 

333 def insertElementTable(self, token): 

334 """Create an element and insert it into the tree""" 

335 element = self.createElement(token) 

336 if self.openElements[-1].name not in tableInsertModeElements: 

337 return self.insertElementNormal(token) 

338 else: 

339 # We should be in the InTable mode. This means we want to do 

340 # special magic element rearranging 

341 parent, insertBefore = self.getTableMisnestedNodePosition() 

342 if insertBefore is None: 

343 parent.appendChild(element) 

344 else: 

345 parent.insertBefore(element, insertBefore) 

346 self.openElements.append(element) 

347 return element 

348 

349 def insertText(self, data, parent=None): 

350 """Insert text data.""" 

351 if parent is None: 

352 parent = self.openElements[-1] 

353 

354 if (not self.insertFromTable or (self.insertFromTable and 

355 self.openElements[-1].name 

356 not in tableInsertModeElements)): 

357 parent.insertText(data) 

358 else: 

359 # We should be in the InTable mode. This means we want to do 

360 # special magic element rearranging 

361 parent, insertBefore = self.getTableMisnestedNodePosition() 

362 parent.insertText(data, insertBefore) 

363 

364 def getTableMisnestedNodePosition(self): 

365 """Get the foster parent element, and sibling to insert before 

366 (or None) when inserting a misnested table node""" 

367 # The foster parent element is the one which comes before the most 

368 # recently opened table element 

369 # XXX - this is really inelegant 

370 lastTable = None 

371 fosterParent = None 

372 insertBefore = None 

373 for elm in self.openElements[::-1]: 

374 if elm.name == "table": 

375 lastTable = elm 

376 break 

377 if lastTable: 

378 # XXX - we should really check that this parent is actually a 

379 # node here 

380 if lastTable.parent: 

381 fosterParent = lastTable.parent 

382 insertBefore = lastTable 

383 else: 

384 fosterParent = self.openElements[ 

385 self.openElements.index(lastTable) - 1] 

386 else: 

387 fosterParent = self.openElements[0] 

388 return fosterParent, insertBefore 

389 

390 def generateImpliedEndTags(self, exclude=None): 

391 name = self.openElements[-1].name 

392 # XXX td, th and tr are not actually needed 

393 if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and 

394 name != exclude): 

395 self.openElements.pop() 

396 # XXX This is not entirely what the specification says. We should 

397 # investigate it more closely. 

398 self.generateImpliedEndTags(exclude) 

399 

400 def getDocument(self): 

401 """Return the final tree""" 

402 return self.document 

403 

404 def getFragment(self): 

405 """Return the final fragment""" 

406 # assert self.innerHTML 

407 fragment = self.fragmentClass() 

408 self.openElements[0].reparentChildren(fragment) 

409 return fragment 

410 

411 def testSerializer(self, node): 

412 """Serialize the subtree of node in the format required by unit tests 

413 

414 :arg node: the node from which to start serializing 

415 

416 """ 

417 raise NotImplementedError