Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/html5lib/treebuilders/base.py: 85%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

202 statements  

1from __future__ import absolute_import, division, unicode_literals 

2from six import text_type 

3 

4from ..constants import scopingElements, tableInsertModeElements, namespaces 

5 

6# The scope markers are inserted when entering object elements, 

7# marquees, table cells, and table captions, and are used to prevent formatting 

8# from "leaking" into tables, object elements, and marquees. 

9Marker = None 

10 

11listElementsMap = { 

12 None: (frozenset(scopingElements), False), 

13 "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False), 

14 "list": (frozenset(scopingElements | {(namespaces["html"], "ol"), 

15 (namespaces["html"], "ul")}), False), 

16 "table": (frozenset([(namespaces["html"], "html"), 

17 (namespaces["html"], "table")]), False), 

18 "select": (frozenset([(namespaces["html"], "optgroup"), 

19 (namespaces["html"], "option")]), True) 

20} 

21 

22 

23class Node(object): 

24 """Represents an item in the tree""" 

25 def __init__(self, name): 

26 """Creates a Node 

27 

28 :arg name: The tag name associated with the node 

29 

30 """ 

31 # The tag name associated with the node 

32 self.name = name 

33 # The parent of the current node (or None for the document node) 

34 self.parent = None 

35 # The value of the current node (applies to text nodes and comments) 

36 self.value = None 

37 # A dict holding name -> value pairs for attributes of the node 

38 self.attributes = {} 

39 # A list of child nodes of the current node. This must include all 

40 # elements but not necessarily other node types. 

41 self.childNodes = [] 

42 # A list of miscellaneous flags that can be set on the node. 

43 self._flags = [] 

44 

45 def __str__(self): 

46 attributesStr = " ".join(["%s=\"%s\"" % (name, value) 

47 for name, value in 

48 self.attributes.items()]) 

49 if attributesStr: 

50 return "<%s %s>" % (self.name, attributesStr) 

51 else: 

52 return "<%s>" % (self.name) 

53 

54 def __repr__(self): 

55 return "<%s>" % (self.name) 

56 

57 def appendChild(self, node): 

58 """Insert node as a child of the current node 

59 

60 :arg node: the node to insert 

61 

62 """ 

63 raise NotImplementedError 

64 

65 def insertText(self, data, insertBefore=None): 

66 """Insert data as text in the current node, positioned before the 

67 start of node insertBefore or to the end of the node's text. 

68 

69 :arg data: the data to insert 

70 

71 :arg insertBefore: True if you want to insert the text before the node 

72 and False if you want to insert it after the node 

73 

74 """ 

75 raise NotImplementedError 

76 

77 def insertBefore(self, node, refNode): 

78 """Insert node as a child of the current node, before refNode in the 

79 list of child nodes. Raises ValueError if refNode is not a child of 

80 the current node 

81 

82 :arg node: the node to insert 

83 

84 :arg refNode: the child node to insert the node before 

85 

86 """ 

87 raise NotImplementedError 

88 

89 def removeChild(self, node): 

90 """Remove node from the children of the current node 

91 

92 :arg node: the child node to remove 

93 

94 """ 

95 raise NotImplementedError 

96 

97 def reparentChildren(self, newParent): 

98 """Move all the children of the current node to newParent. 

99 This is needed so that trees that don't store text as nodes move the 

100 text in the correct way 

101 

102 :arg newParent: the node to move all this node's children to 

103 

104 """ 

105 # XXX - should this method be made more general? 

106 for child in self.childNodes: 

107 newParent.appendChild(child) 

108 self.childNodes = [] 

109 

110 def cloneNode(self): 

111 """Return a shallow copy of the current node i.e. a node with the same 

112 name and attributes but with no parent or child nodes 

113 """ 

114 raise NotImplementedError 

115 

116 def hasContent(self): 

117 """Return true if the node has children or text, false otherwise 

118 """ 

119 raise NotImplementedError 

120 

121 

122class ActiveFormattingElements(list): 

123 def append(self, node): 

124 """Append node to the end of the list.""" 

125 equalCount = 0 

126 if node != Marker: 

127 for element in self[::-1]: 

128 if element == Marker: 

129 break 

130 if self.nodesEqual(element, node): 

131 equalCount += 1 

132 if equalCount == 3: 

133 self.remove(element) 

134 break 

135 list.append(self, node) 

136 

137 def nodesEqual(self, node1, node2): 

138 if not node1.nameTuple == node2.nameTuple: 

139 return False 

140 

141 if not node1.attributes == node2.attributes: 

142 return False 

143 

144 return True 

145 

146 

147class TreeBuilder(object): 

148 """Base treebuilder implementation 

149 

150 * documentClass - the class to use for the bottommost node of a document 

151 * elementClass - the class to use for HTML Elements 

152 * commentClass - the class to use for comments 

153 * doctypeClass - the class to use for doctypes 

154 

155 """ 

156 # pylint:disable=not-callable 

157 

158 # Document class 

159 documentClass = None 

160 

161 # The class to use for creating a node 

162 elementClass = None 

163 

164 # The class to use for creating comments 

165 commentClass = None 

166 

167 # The class to use for creating doctypes 

168 doctypeClass = None 

169 

170 # Fragment class 

171 fragmentClass = None 

172 

173 def __init__(self, namespaceHTMLElements): 

174 """Create a TreeBuilder 

175 

176 :arg namespaceHTMLElements: whether or not to namespace HTML elements 

177 

178 """ 

179 if namespaceHTMLElements: 

180 self.defaultNamespace = "http://www.w3.org/1999/xhtml" 

181 else: 

182 self.defaultNamespace = None 

183 self.reset() 

184 

185 def reset(self): 

186 self.openElements = [] 

187 self.activeFormattingElements = ActiveFormattingElements() 

188 

189 # XXX - rename these to headElement, formElement 

190 self.headPointer = None 

191 self.formPointer = None 

192 

193 self.insertFromTable = False 

194 

195 self.document = self.documentClass() 

196 

197 def elementInScope(self, target, variant=None): 

198 

199 # If we pass a node in we match that. if we pass a string 

200 # match any node with that name 

201 exactNode = hasattr(target, "nameTuple") 

202 if not exactNode: 

203 if isinstance(target, text_type): 

204 target = (namespaces["html"], target) 

205 assert isinstance(target, tuple) 

206 

207 listElements, invert = listElementsMap[variant] 

208 

209 for node in reversed(self.openElements): 

210 if exactNode and node == target: 

211 return True 

212 elif not exactNode and node.nameTuple == target: 

213 return True 

214 elif (invert ^ (node.nameTuple in listElements)): 

215 return False 

216 

217 assert False # We should never reach this point 

218 

219 def reconstructActiveFormattingElements(self): 

220 # Within this algorithm the order of steps described in the 

221 # specification is not quite the same as the order of steps in the 

222 # code. It should still do the same though. 

223 

224 # Step 1: stop the algorithm when there's nothing to do. 

225 if not self.activeFormattingElements: 

226 return 

227 

228 # Step 2 and step 3: we start with the last element. So i is -1. 

229 i = len(self.activeFormattingElements) - 1 

230 entry = self.activeFormattingElements[i] 

231 if entry == Marker or entry in self.openElements: 

232 return 

233 

234 # Step 6 

235 while entry != Marker and entry not in self.openElements: 

236 if i == 0: 

237 # This will be reset to 0 below 

238 i = -1 

239 break 

240 i -= 1 

241 # Step 5: let entry be one earlier in the list. 

242 entry = self.activeFormattingElements[i] 

243 

244 while True: 

245 # Step 7 

246 i += 1 

247 

248 # Step 8 

249 entry = self.activeFormattingElements[i] 

250 clone = entry.cloneNode() # Mainly to get a new copy of the attributes 

251 

252 # Step 9 

253 element = self.insertElement({"type": "StartTag", 

254 "name": clone.name, 

255 "namespace": clone.namespace, 

256 "data": clone.attributes}) 

257 

258 # Step 10 

259 self.activeFormattingElements[i] = element 

260 

261 # Step 11 

262 if element == self.activeFormattingElements[-1]: 

263 break 

264 

265 def clearActiveFormattingElements(self): 

266 entry = self.activeFormattingElements.pop() 

267 while self.activeFormattingElements and entry != Marker: 

268 entry = self.activeFormattingElements.pop() 

269 

270 def elementInActiveFormattingElements(self, name): 

271 """Check if an element exists between the end of the active 

272 formatting elements and the last marker. If it does, return it, else 

273 return false""" 

274 

275 for item in self.activeFormattingElements[::-1]: 

276 # Check for Marker first because if it's a Marker it doesn't have a 

277 # name attribute. 

278 if item == Marker: 

279 break 

280 elif item.name == name: 

281 return item 

282 return False 

283 

284 def insertRoot(self, token): 

285 element = self.createElement(token) 

286 self.openElements.append(element) 

287 self.document.appendChild(element) 

288 

289 def insertDoctype(self, token): 

290 name = token["name"] 

291 publicId = token["publicId"] 

292 systemId = token["systemId"] 

293 

294 doctype = self.doctypeClass(name, publicId, systemId) 

295 self.document.appendChild(doctype) 

296 

297 def insertComment(self, token, parent=None): 

298 if parent is None: 

299 parent = self.openElements[-1] 

300 parent.appendChild(self.commentClass(token["data"])) 

301 

302 def createElement(self, token): 

303 """Create an element but don't insert it anywhere""" 

304 name = token["name"] 

305 namespace = token.get("namespace", self.defaultNamespace) 

306 element = self.elementClass(name, namespace) 

307 element.attributes = token["data"] 

308 return element 

309 

310 def _getInsertFromTable(self): 

311 return self._insertFromTable 

312 

313 def _setInsertFromTable(self, value): 

314 """Switch the function used to insert an element from the 

315 normal one to the misnested table one and back again""" 

316 self._insertFromTable = value 

317 if value: 

318 self.insertElement = self.insertElementTable 

319 else: 

320 self.insertElement = self.insertElementNormal 

321 

322 insertFromTable = property(_getInsertFromTable, _setInsertFromTable) 

323 

324 def insertElementNormal(self, token): 

325 name = token["name"] 

326 assert isinstance(name, text_type), "Element %s not unicode" % name 

327 namespace = token.get("namespace", self.defaultNamespace) 

328 element = self.elementClass(name, namespace) 

329 element.attributes = token["data"] 

330 self.openElements[-1].appendChild(element) 

331 self.openElements.append(element) 

332 return element 

333 

334 def insertElementTable(self, token): 

335 """Create an element and insert it into the tree""" 

336 element = self.createElement(token) 

337 if self.openElements[-1].name not in tableInsertModeElements: 

338 return self.insertElementNormal(token) 

339 else: 

340 # We should be in the InTable mode. This means we want to do 

341 # special magic element rearranging 

342 parent, insertBefore = self.getTableMisnestedNodePosition() 

343 if insertBefore is None: 

344 parent.appendChild(element) 

345 else: 

346 parent.insertBefore(element, insertBefore) 

347 self.openElements.append(element) 

348 return element 

349 

350 def insertText(self, data, parent=None): 

351 """Insert text data.""" 

352 if parent is None: 

353 parent = self.openElements[-1] 

354 

355 if (not self.insertFromTable or (self.insertFromTable and 

356 self.openElements[-1].name 

357 not in tableInsertModeElements)): 

358 parent.insertText(data) 

359 else: 

360 # We should be in the InTable mode. This means we want to do 

361 # special magic element rearranging 

362 parent, insertBefore = self.getTableMisnestedNodePosition() 

363 parent.insertText(data, insertBefore) 

364 

365 def getTableMisnestedNodePosition(self): 

366 """Get the foster parent element, and sibling to insert before 

367 (or None) when inserting a misnested table node""" 

368 # The foster parent element is the one which comes before the most 

369 # recently opened table element 

370 # XXX - this is really inelegant 

371 lastTable = None 

372 fosterParent = None 

373 insertBefore = None 

374 for elm in self.openElements[::-1]: 

375 if elm.name == "table": 

376 lastTable = elm 

377 break 

378 if lastTable: 

379 # XXX - we should really check that this parent is actually a 

380 # node here 

381 if lastTable.parent: 

382 fosterParent = lastTable.parent 

383 insertBefore = lastTable 

384 else: 

385 fosterParent = self.openElements[ 

386 self.openElements.index(lastTable) - 1] 

387 else: 

388 fosterParent = self.openElements[0] 

389 return fosterParent, insertBefore 

390 

391 def generateImpliedEndTags(self, exclude=None): 

392 name = self.openElements[-1].name 

393 # XXX td, th and tr are not actually needed 

394 if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and 

395 name != exclude): 

396 self.openElements.pop() 

397 # XXX This is not entirely what the specification says. We should 

398 # investigate it more closely. 

399 self.generateImpliedEndTags(exclude) 

400 

401 def getDocument(self): 

402 """Return the final tree""" 

403 return self.document 

404 

405 def getFragment(self): 

406 """Return the final fragment""" 

407 # assert self.innerHTML 

408 fragment = self.fragmentClass() 

409 self.openElements[0].reparentChildren(fragment) 

410 return fragment 

411 

412 def testSerializer(self, node): 

413 """Serialize the subtree of node in the format required by unit tests 

414 

415 :arg node: the node from which to start serializing 

416 

417 """ 

418 raise NotImplementedError