Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/html5lib/html5parser.py: 94%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1518 statements  

1from __future__ import absolute_import, division, unicode_literals 

2from six import viewkeys 

3 

4from . import _inputstream 

5from . import _tokenizer 

6 

7from . import treebuilders 

8from .treebuilders.base import Marker 

9 

10from . import _utils 

11from .constants import ( 

12 spaceCharacters, asciiUpper2Lower, 

13 specialElements, headingElements, cdataElements, rcdataElements, 

14 tokenTypes, 

15 namespaces, 

16 htmlIntegrationPointElements, mathmlTextIntegrationPointElements, 

17 adjustForeignAttributes as adjustForeignAttributesMap, 

18 adjustMathMLAttributes, adjustSVGAttributes, 

19 E, 

20 _ReparseException 

21) 

22 

23 

24def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): 

25 """Parse an HTML document as a string or file-like object into a tree 

26 

27 :arg doc: the document to parse as a string or file-like object 

28 

29 :arg treebuilder: the treebuilder to use when parsing 

30 

31 :arg namespaceHTMLElements: whether or not to namespace HTML elements 

32 

33 :returns: parsed tree 

34 

35 Example: 

36 

37 >>> from html5lib.html5parser import parse 

38 >>> parse('<html><body><p>This is a doc</p></body></html>') 

39 <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> 

40 

41 """ 

42 tb = treebuilders.getTreeBuilder(treebuilder) 

43 p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) 

44 return p.parse(doc, **kwargs) 

45 

46 

47def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): 

48 """Parse an HTML fragment as a string or file-like object into a tree 

49 

50 :arg doc: the fragment to parse as a string or file-like object 

51 

52 :arg container: the container context to parse the fragment in 

53 

54 :arg treebuilder: the treebuilder to use when parsing 

55 

56 :arg namespaceHTMLElements: whether or not to namespace HTML elements 

57 

58 :returns: parsed tree 

59 

60 Example: 

61 

62 >>> from html5lib.html5libparser import parseFragment 

63 >>> parseFragment('<b>this is a fragment</b>') 

64 <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> 

65 

66 """ 

67 tb = treebuilders.getTreeBuilder(treebuilder) 

68 p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) 

69 return p.parseFragment(doc, container=container, **kwargs) 

70 

71 

72class HTMLParser(object): 

73 """HTML parser 

74 

75 Generates a tree structure from a stream of (possibly malformed) HTML. 

76 

77 """ 

78 

79 def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): 

80 """ 

81 :arg tree: a treebuilder class controlling the type of tree that will be 

82 returned. Built in treebuilders can be accessed through 

83 html5lib.treebuilders.getTreeBuilder(treeType) 

84 

85 :arg strict: raise an exception when a parse error is encountered 

86 

87 :arg namespaceHTMLElements: whether or not to namespace HTML elements 

88 

89 :arg debug: whether or not to enable debug mode which logs things 

90 

91 Example: 

92 

93 >>> from html5lib.html5parser import HTMLParser 

94 >>> parser = HTMLParser() # generates parser with etree builder 

95 >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict 

96 

97 """ 

98 

99 # Raise an exception on the first error encountered 

100 self.strict = strict 

101 self.debug = debug 

102 

103 if tree is None: 

104 tree = treebuilders.getTreeBuilder("etree") 

105 elif isinstance(tree, str): 

106 tree = treebuilders.getTreeBuilder(tree) 

107 

108 self.tree = tree(namespaceHTMLElements) 

109 self.errors = [] 

110 

111 self.phases = {name: cls(self, self.tree) for name, cls in 

112 _phases.items()} 

113 

114 def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): 

115 

116 self.innerHTMLMode = innerHTML 

117 self.container = container 

118 self.scripting = scripting 

119 self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) 

120 self.reset() 

121 

122 try: 

123 self.mainLoop() 

124 except _ReparseException: 

125 self.reset() 

126 self.mainLoop() 

127 

128 def reset(self): 

129 self.tree.reset() 

130 self.firstStartTag = False 

131 self.errors = [] 

132 self.log = [] # only used with debug mode 

133 # "quirks" / "limited quirks" / "no quirks" 

134 self.compatMode = "no quirks" 

135 

136 if self.innerHTMLMode: 

137 self.innerHTML = self.container.lower() 

138 

139 if self.innerHTML in cdataElements: 

140 self.tokenizer.state = self.tokenizer.rcdataState 

141 elif self.innerHTML in rcdataElements: 

142 self.tokenizer.state = self.tokenizer.rawtextState 

143 elif self.innerHTML == 'plaintext': 

144 self.tokenizer.state = self.tokenizer.plaintextState 

145 else: 

146 # state already is data state 

147 # self.tokenizer.state = self.tokenizer.dataState 

148 pass 

149 self.phase = self.phases["beforeHtml"] 

150 self.phase.insertHtmlElement() 

151 self.resetInsertionMode() 

152 else: 

153 self.innerHTML = False # pylint:disable=redefined-variable-type 

154 self.phase = self.phases["initial"] 

155 

156 self.lastPhase = None 

157 

158 self.beforeRCDataPhase = None 

159 

160 self.framesetOK = True 

161 

162 @property 

163 def documentEncoding(self): 

164 """Name of the character encoding that was used to decode the input stream, or 

165 :obj:`None` if that is not determined yet 

166 

167 """ 

168 if not hasattr(self, 'tokenizer'): 

169 return None 

170 return self.tokenizer.stream.charEncoding[0].name 

171 

172 def isHTMLIntegrationPoint(self, element): 

173 if (element.name == "annotation-xml" and 

174 element.namespace == namespaces["mathml"]): 

175 return ("encoding" in element.attributes and 

176 element.attributes["encoding"].translate( 

177 asciiUpper2Lower) in 

178 ("text/html", "application/xhtml+xml")) 

179 else: 

180 return (element.namespace, element.name) in htmlIntegrationPointElements 

181 

182 def isMathMLTextIntegrationPoint(self, element): 

183 return (element.namespace, element.name) in mathmlTextIntegrationPointElements 

184 

185 def mainLoop(self): 

186 CharactersToken = tokenTypes["Characters"] 

187 SpaceCharactersToken = tokenTypes["SpaceCharacters"] 

188 StartTagToken = tokenTypes["StartTag"] 

189 EndTagToken = tokenTypes["EndTag"] 

190 CommentToken = tokenTypes["Comment"] 

191 DoctypeToken = tokenTypes["Doctype"] 

192 ParseErrorToken = tokenTypes["ParseError"] 

193 

194 type_names = {value: key for key, value in tokenTypes.items()} 

195 debug = self.debug 

196 

197 for token in self.tokenizer: 

198 prev_token = None 

199 new_token = token 

200 while new_token is not None: 

201 prev_token = new_token 

202 currentNode = self.tree.openElements[-1] if self.tree.openElements else None 

203 currentNodeNamespace = currentNode.namespace if currentNode else None 

204 currentNodeName = currentNode.name if currentNode else None 

205 

206 type = new_token["type"] 

207 

208 if type == ParseErrorToken: 

209 self.parseError(new_token["data"], new_token.get("datavars", {})) 

210 new_token = None 

211 else: 

212 if (len(self.tree.openElements) == 0 or 

213 currentNodeNamespace == self.tree.defaultNamespace or 

214 (self.isMathMLTextIntegrationPoint(currentNode) and 

215 ((type == StartTagToken and 

216 token["name"] not in frozenset(["mglyph", "malignmark"])) or 

217 type in (CharactersToken, SpaceCharactersToken))) or 

218 (currentNodeNamespace == namespaces["mathml"] and 

219 currentNodeName == "annotation-xml" and 

220 type == StartTagToken and 

221 token["name"] == "svg") or 

222 (self.isHTMLIntegrationPoint(currentNode) and 

223 type in (StartTagToken, CharactersToken, SpaceCharactersToken))): 

224 phase = self.phase 

225 else: 

226 phase = self.phases["inForeignContent"] 

227 

228 if debug: 

229 info = {"type": type_names[type]} 

230 if type in (StartTagToken, EndTagToken): 

231 info["name"] = new_token['name'] 

232 

233 self.log.append((self.tokenizer.state.__name__, 

234 self.phase.__class__.__name__, 

235 phase.__class__.__name__, 

236 "process" + info["type"], 

237 info)) 

238 

239 if type == CharactersToken: 

240 new_token = phase.processCharacters(new_token) 

241 elif type == SpaceCharactersToken: 

242 new_token = phase.processSpaceCharacters(new_token) 

243 elif type == StartTagToken: 

244 new_token = phase.processStartTag(new_token) 

245 elif type == EndTagToken: 

246 new_token = phase.processEndTag(new_token) 

247 elif type == CommentToken: 

248 new_token = phase.processComment(new_token) 

249 elif type == DoctypeToken: 

250 new_token = phase.processDoctype(new_token) 

251 

252 if (type == StartTagToken and prev_token["selfClosing"] and 

253 not prev_token["selfClosingAcknowledged"]): 

254 self.parseError("non-void-element-with-trailing-solidus", 

255 {"name": prev_token["name"]}) 

256 

257 # When the loop finishes it's EOF 

258 reprocess = True 

259 phases = [] 

260 while reprocess: 

261 phases.append(self.phase) 

262 reprocess = self.phase.processEOF() 

263 if reprocess: 

264 assert self.phase not in phases 

265 

266 def parse(self, stream, *args, **kwargs): 

267 """Parse a HTML document into a well-formed tree 

268 

269 :arg stream: a file-like object or string containing the HTML to be parsed 

270 

271 The optional encoding parameter must be a string that indicates 

272 the encoding. If specified, that encoding will be used, 

273 regardless of any BOM or later declaration (such as in a meta 

274 element). 

275 

276 :arg scripting: treat noscript elements as if JavaScript was turned on 

277 

278 :returns: parsed tree 

279 

280 Example: 

281 

282 >>> from html5lib.html5parser import HTMLParser 

283 >>> parser = HTMLParser() 

284 >>> parser.parse('<html><body><p>This is a doc</p></body></html>') 

285 <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> 

286 

287 """ 

288 self._parse(stream, False, None, *args, **kwargs) 

289 return self.tree.getDocument() 

290 

291 def parseFragment(self, stream, *args, **kwargs): 

292 """Parse a HTML fragment into a well-formed tree fragment 

293 

294 :arg container: name of the element we're setting the innerHTML 

295 property if set to None, default to 'div' 

296 

297 :arg stream: a file-like object or string containing the HTML to be parsed 

298 

299 The optional encoding parameter must be a string that indicates 

300 the encoding. If specified, that encoding will be used, 

301 regardless of any BOM or later declaration (such as in a meta 

302 element) 

303 

304 :arg scripting: treat noscript elements as if JavaScript was turned on 

305 

306 :returns: parsed tree 

307 

308 Example: 

309 

310 >>> from html5lib.html5libparser import HTMLParser 

311 >>> parser = HTMLParser() 

312 >>> parser.parseFragment('<b>this is a fragment</b>') 

313 <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> 

314 

315 """ 

316 self._parse(stream, True, *args, **kwargs) 

317 return self.tree.getFragment() 

318 

319 def parseError(self, errorcode="XXX-undefined-error", datavars=None): 

320 # XXX The idea is to make errorcode mandatory. 

321 if datavars is None: 

322 datavars = {} 

323 self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) 

324 if self.strict: 

325 raise ParseError(E[errorcode] % datavars) 

326 

327 def adjustMathMLAttributes(self, token): 

328 adjust_attributes(token, adjustMathMLAttributes) 

329 

330 def adjustSVGAttributes(self, token): 

331 adjust_attributes(token, adjustSVGAttributes) 

332 

333 def adjustForeignAttributes(self, token): 

334 adjust_attributes(token, adjustForeignAttributesMap) 

335 

336 def reparseTokenNormal(self, token): 

337 # pylint:disable=unused-argument 

338 self.parser.phase() 

339 

340 def resetInsertionMode(self): 

341 # The name of this method is mostly historical. (It's also used in the 

342 # specification.) 

343 last = False 

344 newModes = { 

345 "select": "inSelect", 

346 "td": "inCell", 

347 "th": "inCell", 

348 "tr": "inRow", 

349 "tbody": "inTableBody", 

350 "thead": "inTableBody", 

351 "tfoot": "inTableBody", 

352 "caption": "inCaption", 

353 "colgroup": "inColumnGroup", 

354 "table": "inTable", 

355 "head": "inBody", 

356 "body": "inBody", 

357 "frameset": "inFrameset", 

358 "html": "beforeHead" 

359 } 

360 for node in self.tree.openElements[::-1]: 

361 nodeName = node.name 

362 new_phase = None 

363 if node == self.tree.openElements[0]: 

364 assert self.innerHTML 

365 last = True 

366 nodeName = self.innerHTML 

367 # Check for conditions that should only happen in the innerHTML 

368 # case 

369 if nodeName in ("select", "colgroup", "head", "html"): 

370 assert self.innerHTML 

371 

372 if not last and node.namespace != self.tree.defaultNamespace: 

373 continue 

374 

375 if nodeName in newModes: 

376 new_phase = self.phases[newModes[nodeName]] 

377 break 

378 elif last: 

379 new_phase = self.phases["inBody"] 

380 break 

381 

382 self.phase = new_phase 

383 

384 def parseRCDataRawtext(self, token, contentType): 

385 # Generic RCDATA/RAWTEXT Parsing algorithm 

386 assert contentType in ("RAWTEXT", "RCDATA") 

387 

388 self.tree.insertElement(token) 

389 

390 if contentType == "RAWTEXT": 

391 self.tokenizer.state = self.tokenizer.rawtextState 

392 else: 

393 self.tokenizer.state = self.tokenizer.rcdataState 

394 

395 self.originalPhase = self.phase 

396 

397 self.phase = self.phases["text"] 

398 

399 

400class Phase(object): 

401 """Base class for helper object that implements each phase of processing 

402 """ 

403 __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") 

404 

405 def __init__(self, parser, tree): 

406 self.parser = parser 

407 self.tree = tree 

408 self.__startTagCache = {} 

409 self.__endTagCache = {} 

410 

411 def processEOF(self): 

412 raise NotImplementedError 

413 

414 def processComment(self, token): 

415 # For most phases the following is correct. Where it's not it will be 

416 # overridden. 

417 self.tree.insertComment(token, self.tree.openElements[-1]) 

418 

419 def processDoctype(self, token): 

420 self.parser.parseError("unexpected-doctype") 

421 

422 def processCharacters(self, token): 

423 self.tree.insertText(token["data"]) 

424 

425 def processSpaceCharacters(self, token): 

426 self.tree.insertText(token["data"]) 

427 

428 def processStartTag(self, token): 

429 # Note the caching is done here rather than BoundMethodDispatcher as doing it there 

430 # requires a circular reference to the Phase, and this ends up with a significant 

431 # (CPython 2.7, 3.8) GC cost when parsing many short inputs 

432 name = token["name"] 

433 # In Py2, using `in` is quicker in general than try/except KeyError 

434 # In Py3, `in` is quicker when there are few cache hits (typically short inputs) 

435 if name in self.__startTagCache: 

436 func = self.__startTagCache[name] 

437 else: 

438 func = self.__startTagCache[name] = self.startTagHandler[name] 

439 # bound the cache size in case we get loads of unknown tags 

440 while len(self.__startTagCache) > len(self.startTagHandler) * 1.1: 

441 # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7 

442 self.__startTagCache.pop(next(iter(self.__startTagCache))) 

443 return func(token) 

444 

445 def startTagHtml(self, token): 

446 if not self.parser.firstStartTag and token["name"] == "html": 

447 self.parser.parseError("non-html-root") 

448 # XXX Need a check here to see if the first start tag token emitted is 

449 # this token... If it's not, invoke self.parser.parseError(). 

450 for attr, value in token["data"].items(): 

451 if attr not in self.tree.openElements[0].attributes: 

452 self.tree.openElements[0].attributes[attr] = value 

453 self.parser.firstStartTag = False 

454 

455 def processEndTag(self, token): 

456 # Note the caching is done here rather than BoundMethodDispatcher as doing it there 

457 # requires a circular reference to the Phase, and this ends up with a significant 

458 # (CPython 2.7, 3.8) GC cost when parsing many short inputs 

459 name = token["name"] 

460 # In Py2, using `in` is quicker in general than try/except KeyError 

461 # In Py3, `in` is quicker when there are few cache hits (typically short inputs) 

462 if name in self.__endTagCache: 

463 func = self.__endTagCache[name] 

464 else: 

465 func = self.__endTagCache[name] = self.endTagHandler[name] 

466 # bound the cache size in case we get loads of unknown tags 

467 while len(self.__endTagCache) > len(self.endTagHandler) * 1.1: 

468 # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7 

469 self.__endTagCache.pop(next(iter(self.__endTagCache))) 

470 return func(token) 

471 

472 

473class InitialPhase(Phase): 

474 __slots__ = tuple() 

475 

476 def processSpaceCharacters(self, token): 

477 pass 

478 

479 def processComment(self, token): 

480 self.tree.insertComment(token, self.tree.document) 

481 

482 def processDoctype(self, token): 

483 name = token["name"] 

484 publicId = token["publicId"] 

485 systemId = token["systemId"] 

486 correct = token["correct"] 

487 

488 if (name != "html" or publicId is not None or 

489 systemId is not None and systemId != "about:legacy-compat"): 

490 self.parser.parseError("unknown-doctype") 

491 

492 if publicId is None: 

493 publicId = "" 

494 

495 self.tree.insertDoctype(token) 

496 

497 if publicId != "": 

498 publicId = publicId.translate(asciiUpper2Lower) 

499 

500 if (not correct or token["name"] != "html" or 

501 publicId.startswith( 

502 ("+//silmaril//dtd html pro v0r11 19970101//", 

503 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", 

504 "-//as//dtd html 3.0 aswedit + extensions//", 

505 "-//ietf//dtd html 2.0 level 1//", 

506 "-//ietf//dtd html 2.0 level 2//", 

507 "-//ietf//dtd html 2.0 strict level 1//", 

508 "-//ietf//dtd html 2.0 strict level 2//", 

509 "-//ietf//dtd html 2.0 strict//", 

510 "-//ietf//dtd html 2.0//", 

511 "-//ietf//dtd html 2.1e//", 

512 "-//ietf//dtd html 3.0//", 

513 "-//ietf//dtd html 3.2 final//", 

514 "-//ietf//dtd html 3.2//", 

515 "-//ietf//dtd html 3//", 

516 "-//ietf//dtd html level 0//", 

517 "-//ietf//dtd html level 1//", 

518 "-//ietf//dtd html level 2//", 

519 "-//ietf//dtd html level 3//", 

520 "-//ietf//dtd html strict level 0//", 

521 "-//ietf//dtd html strict level 1//", 

522 "-//ietf//dtd html strict level 2//", 

523 "-//ietf//dtd html strict level 3//", 

524 "-//ietf//dtd html strict//", 

525 "-//ietf//dtd html//", 

526 "-//metrius//dtd metrius presentational//", 

527 "-//microsoft//dtd internet explorer 2.0 html strict//", 

528 "-//microsoft//dtd internet explorer 2.0 html//", 

529 "-//microsoft//dtd internet explorer 2.0 tables//", 

530 "-//microsoft//dtd internet explorer 3.0 html strict//", 

531 "-//microsoft//dtd internet explorer 3.0 html//", 

532 "-//microsoft//dtd internet explorer 3.0 tables//", 

533 "-//netscape comm. corp.//dtd html//", 

534 "-//netscape comm. corp.//dtd strict html//", 

535 "-//o'reilly and associates//dtd html 2.0//", 

536 "-//o'reilly and associates//dtd html extended 1.0//", 

537 "-//o'reilly and associates//dtd html extended relaxed 1.0//", 

538 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", 

539 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", 

540 "-//spyglass//dtd html 2.0 extended//", 

541 "-//sq//dtd html 2.0 hotmetal + extensions//", 

542 "-//sun microsystems corp.//dtd hotjava html//", 

543 "-//sun microsystems corp.//dtd hotjava strict html//", 

544 "-//w3c//dtd html 3 1995-03-24//", 

545 "-//w3c//dtd html 3.2 draft//", 

546 "-//w3c//dtd html 3.2 final//", 

547 "-//w3c//dtd html 3.2//", 

548 "-//w3c//dtd html 3.2s draft//", 

549 "-//w3c//dtd html 4.0 frameset//", 

550 "-//w3c//dtd html 4.0 transitional//", 

551 "-//w3c//dtd html experimental 19960712//", 

552 "-//w3c//dtd html experimental 970421//", 

553 "-//w3c//dtd w3 html//", 

554 "-//w3o//dtd w3 html 3.0//", 

555 "-//webtechs//dtd mozilla html 2.0//", 

556 "-//webtechs//dtd mozilla html//")) or 

557 publicId in ("-//w3o//dtd w3 html strict 3.0//en//", 

558 "-/w3c/dtd html 4.0 transitional/en", 

559 "html") or 

560 publicId.startswith( 

561 ("-//w3c//dtd html 4.01 frameset//", 

562 "-//w3c//dtd html 4.01 transitional//")) and 

563 systemId is None or 

564 systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): 

565 self.parser.compatMode = "quirks" 

566 elif (publicId.startswith( 

567 ("-//w3c//dtd xhtml 1.0 frameset//", 

568 "-//w3c//dtd xhtml 1.0 transitional//")) or 

569 publicId.startswith( 

570 ("-//w3c//dtd html 4.01 frameset//", 

571 "-//w3c//dtd html 4.01 transitional//")) and 

572 systemId is not None): 

573 self.parser.compatMode = "limited quirks" 

574 

575 self.parser.phase = self.parser.phases["beforeHtml"] 

576 

577 def anythingElse(self): 

578 self.parser.compatMode = "quirks" 

579 self.parser.phase = self.parser.phases["beforeHtml"] 

580 

581 def processCharacters(self, token): 

582 self.parser.parseError("expected-doctype-but-got-chars") 

583 self.anythingElse() 

584 return token 

585 

586 def processStartTag(self, token): 

587 self.parser.parseError("expected-doctype-but-got-start-tag", 

588 {"name": token["name"]}) 

589 self.anythingElse() 

590 return token 

591 

592 def processEndTag(self, token): 

593 self.parser.parseError("expected-doctype-but-got-end-tag", 

594 {"name": token["name"]}) 

595 self.anythingElse() 

596 return token 

597 

598 def processEOF(self): 

599 self.parser.parseError("expected-doctype-but-got-eof") 

600 self.anythingElse() 

601 return True 

602 

603 

604class BeforeHtmlPhase(Phase): 

605 __slots__ = tuple() 

606 

607 # helper methods 

608 def insertHtmlElement(self): 

609 self.tree.insertRoot(impliedTagToken("html", "StartTag")) 

610 self.parser.phase = self.parser.phases["beforeHead"] 

611 

612 # other 

613 def processEOF(self): 

614 self.insertHtmlElement() 

615 return True 

616 

617 def processComment(self, token): 

618 self.tree.insertComment(token, self.tree.document) 

619 

620 def processSpaceCharacters(self, token): 

621 pass 

622 

623 def processCharacters(self, token): 

624 self.insertHtmlElement() 

625 return token 

626 

627 def processStartTag(self, token): 

628 if token["name"] == "html": 

629 self.parser.firstStartTag = True 

630 self.insertHtmlElement() 

631 return token 

632 

633 def processEndTag(self, token): 

634 if token["name"] not in ("head", "body", "html", "br"): 

635 self.parser.parseError("unexpected-end-tag-before-html", 

636 {"name": token["name"]}) 

637 else: 

638 self.insertHtmlElement() 

639 return token 

640 

641 

642class BeforeHeadPhase(Phase): 

643 __slots__ = tuple() 

644 

645 def processEOF(self): 

646 self.startTagHead(impliedTagToken("head", "StartTag")) 

647 return True 

648 

649 def processSpaceCharacters(self, token): 

650 pass 

651 

652 def processCharacters(self, token): 

653 self.startTagHead(impliedTagToken("head", "StartTag")) 

654 return token 

655 

656 def startTagHtml(self, token): 

657 return self.parser.phases["inBody"].processStartTag(token) 

658 

659 def startTagHead(self, token): 

660 self.tree.insertElement(token) 

661 self.tree.headPointer = self.tree.openElements[-1] 

662 self.parser.phase = self.parser.phases["inHead"] 

663 

664 def startTagOther(self, token): 

665 self.startTagHead(impliedTagToken("head", "StartTag")) 

666 return token 

667 

668 def endTagImplyHead(self, token): 

669 self.startTagHead(impliedTagToken("head", "StartTag")) 

670 return token 

671 

672 def endTagOther(self, token): 

673 self.parser.parseError("end-tag-after-implied-root", 

674 {"name": token["name"]}) 

675 

676 startTagHandler = _utils.MethodDispatcher([ 

677 ("html", startTagHtml), 

678 ("head", startTagHead) 

679 ]) 

680 startTagHandler.default = startTagOther 

681 

682 endTagHandler = _utils.MethodDispatcher([ 

683 (("head", "body", "html", "br"), endTagImplyHead) 

684 ]) 

685 endTagHandler.default = endTagOther 

686 

687 

688class InHeadPhase(Phase): 

689 __slots__ = tuple() 

690 

691 # the real thing 

692 def processEOF(self): 

693 self.anythingElse() 

694 return True 

695 

696 def processCharacters(self, token): 

697 self.anythingElse() 

698 return token 

699 

700 def startTagHtml(self, token): 

701 return self.parser.phases["inBody"].processStartTag(token) 

702 

703 def startTagHead(self, token): 

704 self.parser.parseError("two-heads-are-not-better-than-one") 

705 

706 def startTagBaseLinkCommand(self, token): 

707 self.tree.insertElement(token) 

708 self.tree.openElements.pop() 

709 token["selfClosingAcknowledged"] = True 

710 

711 def startTagMeta(self, token): 

712 self.tree.insertElement(token) 

713 self.tree.openElements.pop() 

714 token["selfClosingAcknowledged"] = True 

715 

716 attributes = token["data"] 

717 if self.parser.tokenizer.stream.charEncoding[1] == "tentative": 

718 if "charset" in attributes: 

719 self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) 

720 elif ("content" in attributes and 

721 "http-equiv" in attributes and 

722 attributes["http-equiv"].lower() == "content-type"): 

723 # Encoding it as UTF-8 here is a hack, as really we should pass 

724 # the abstract Unicode string, and just use the 

725 # ContentAttrParser on that, but using UTF-8 allows all chars 

726 # to be encoded and as a ASCII-superset works. 

727 data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) 

728 parser = _inputstream.ContentAttrParser(data) 

729 codec = parser.parse() 

730 self.parser.tokenizer.stream.changeEncoding(codec) 

731 

732 def startTagTitle(self, token): 

733 self.parser.parseRCDataRawtext(token, "RCDATA") 

734 

735 def startTagNoFramesStyle(self, token): 

736 # Need to decide whether to implement the scripting-disabled case 

737 self.parser.parseRCDataRawtext(token, "RAWTEXT") 

738 

739 def startTagNoscript(self, token): 

740 if self.parser.scripting: 

741 self.parser.parseRCDataRawtext(token, "RAWTEXT") 

742 else: 

743 self.tree.insertElement(token) 

744 self.parser.phase = self.parser.phases["inHeadNoscript"] 

745 

746 def startTagScript(self, token): 

747 self.tree.insertElement(token) 

748 self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState 

749 self.parser.originalPhase = self.parser.phase 

750 self.parser.phase = self.parser.phases["text"] 

751 

752 def startTagOther(self, token): 

753 self.anythingElse() 

754 return token 

755 

756 def endTagHead(self, token): 

757 node = self.parser.tree.openElements.pop() 

758 assert node.name == "head", "Expected head got %s" % node.name 

759 self.parser.phase = self.parser.phases["afterHead"] 

760 

761 def endTagHtmlBodyBr(self, token): 

762 self.anythingElse() 

763 return token 

764 

765 def endTagOther(self, token): 

766 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) 

767 

768 def anythingElse(self): 

769 self.endTagHead(impliedTagToken("head")) 

770 

771 startTagHandler = _utils.MethodDispatcher([ 

772 ("html", startTagHtml), 

773 ("title", startTagTitle), 

774 (("noframes", "style"), startTagNoFramesStyle), 

775 ("noscript", startTagNoscript), 

776 ("script", startTagScript), 

777 (("base", "basefont", "bgsound", "command", "link"), 

778 startTagBaseLinkCommand), 

779 ("meta", startTagMeta), 

780 ("head", startTagHead) 

781 ]) 

782 startTagHandler.default = startTagOther 

783 

784 endTagHandler = _utils.MethodDispatcher([ 

785 ("head", endTagHead), 

786 (("br", "html", "body"), endTagHtmlBodyBr) 

787 ]) 

788 endTagHandler.default = endTagOther 

789 

790 

791class InHeadNoscriptPhase(Phase): 

792 __slots__ = tuple() 

793 

794 def processEOF(self): 

795 self.parser.parseError("eof-in-head-noscript") 

796 self.anythingElse() 

797 return True 

798 

799 def processComment(self, token): 

800 return self.parser.phases["inHead"].processComment(token) 

801 

802 def processCharacters(self, token): 

803 self.parser.parseError("char-in-head-noscript") 

804 self.anythingElse() 

805 return token 

806 

807 def processSpaceCharacters(self, token): 

808 return self.parser.phases["inHead"].processSpaceCharacters(token) 

809 

810 def startTagHtml(self, token): 

811 return self.parser.phases["inBody"].processStartTag(token) 

812 

813 def startTagBaseLinkCommand(self, token): 

814 return self.parser.phases["inHead"].processStartTag(token) 

815 

816 def startTagHeadNoscript(self, token): 

817 self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) 

818 

819 def startTagOther(self, token): 

820 self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) 

821 self.anythingElse() 

822 return token 

823 

824 def endTagNoscript(self, token): 

825 node = self.parser.tree.openElements.pop() 

826 assert node.name == "noscript", "Expected noscript got %s" % node.name 

827 self.parser.phase = self.parser.phases["inHead"] 

828 

829 def endTagBr(self, token): 

830 self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) 

831 self.anythingElse() 

832 return token 

833 

834 def endTagOther(self, token): 

835 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) 

836 

837 def anythingElse(self): 

838 # Caller must raise parse error first! 

839 self.endTagNoscript(impliedTagToken("noscript")) 

840 

841 startTagHandler = _utils.MethodDispatcher([ 

842 ("html", startTagHtml), 

843 (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand), 

844 (("head", "noscript"), startTagHeadNoscript), 

845 ]) 

846 startTagHandler.default = startTagOther 

847 

848 endTagHandler = _utils.MethodDispatcher([ 

849 ("noscript", endTagNoscript), 

850 ("br", endTagBr), 

851 ]) 

852 endTagHandler.default = endTagOther 

853 

854 

855class AfterHeadPhase(Phase): 

856 __slots__ = tuple() 

857 

858 def processEOF(self): 

859 self.anythingElse() 

860 return True 

861 

862 def processCharacters(self, token): 

863 self.anythingElse() 

864 return token 

865 

866 def startTagHtml(self, token): 

867 return self.parser.phases["inBody"].processStartTag(token) 

868 

869 def startTagBody(self, token): 

870 self.parser.framesetOK = False 

871 self.tree.insertElement(token) 

872 self.parser.phase = self.parser.phases["inBody"] 

873 

874 def startTagFrameset(self, token): 

875 self.tree.insertElement(token) 

876 self.parser.phase = self.parser.phases["inFrameset"] 

877 

878 def startTagFromHead(self, token): 

879 self.parser.parseError("unexpected-start-tag-out-of-my-head", 

880 {"name": token["name"]}) 

881 self.tree.openElements.append(self.tree.headPointer) 

882 self.parser.phases["inHead"].processStartTag(token) 

883 for node in self.tree.openElements[::-1]: 

884 if node.name == "head": 

885 self.tree.openElements.remove(node) 

886 break 

887 

888 def startTagHead(self, token): 

889 self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) 

890 

891 def startTagOther(self, token): 

892 self.anythingElse() 

893 return token 

894 

895 def endTagHtmlBodyBr(self, token): 

896 self.anythingElse() 

897 return token 

898 

899 def endTagOther(self, token): 

900 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) 

901 

902 def anythingElse(self): 

903 self.tree.insertElement(impliedTagToken("body", "StartTag")) 

904 self.parser.phase = self.parser.phases["inBody"] 

905 self.parser.framesetOK = True 

906 

907 startTagHandler = _utils.MethodDispatcher([ 

908 ("html", startTagHtml), 

909 ("body", startTagBody), 

910 ("frameset", startTagFrameset), 

911 (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", 

912 "style", "title"), 

913 startTagFromHead), 

914 ("head", startTagHead) 

915 ]) 

916 startTagHandler.default = startTagOther 

917 endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), 

918 endTagHtmlBodyBr)]) 

919 endTagHandler.default = endTagOther 

920 

921 

922class InBodyPhase(Phase): 

923 # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody 

924 # the really-really-really-very crazy mode 

925 __slots__ = ("processSpaceCharacters",) 

926 

927 def __init__(self, *args, **kwargs): 

928 super(InBodyPhase, self).__init__(*args, **kwargs) 

929 # Set this to the default handler 

930 self.processSpaceCharacters = self.processSpaceCharactersNonPre 

931 

932 def isMatchingFormattingElement(self, node1, node2): 

933 return (node1.name == node2.name and 

934 node1.namespace == node2.namespace and 

935 node1.attributes == node2.attributes) 

936 

937 # helper 

938 def addFormattingElement(self, token): 

939 self.tree.insertElement(token) 

940 element = self.tree.openElements[-1] 

941 

942 matchingElements = [] 

943 for node in self.tree.activeFormattingElements[::-1]: 

944 if node is Marker: 

945 break 

946 elif self.isMatchingFormattingElement(node, element): 

947 matchingElements.append(node) 

948 

949 assert len(matchingElements) <= 3 

950 if len(matchingElements) == 3: 

951 self.tree.activeFormattingElements.remove(matchingElements[-1]) 

952 self.tree.activeFormattingElements.append(element) 

953 

954 # the real deal 

955 def processEOF(self): 

956 allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", 

957 "tfoot", "th", "thead", "tr", "body", 

958 "html")) 

959 for node in self.tree.openElements[::-1]: 

960 if node.name not in allowed_elements: 

961 self.parser.parseError("expected-closing-tag-but-got-eof") 

962 break 

963 # Stop parsing 

964 

965 def processSpaceCharactersDropNewline(self, token): 

966 # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we 

967 # want to drop leading newlines 

968 data = token["data"] 

969 self.processSpaceCharacters = self.processSpaceCharactersNonPre 

970 if (data.startswith("\n") and 

971 self.tree.openElements[-1].name in ("pre", "listing", "textarea") and 

972 not self.tree.openElements[-1].hasContent()): 

973 data = data[1:] 

974 if data: 

975 self.tree.reconstructActiveFormattingElements() 

976 self.tree.insertText(data) 

977 

978 def processCharacters(self, token): 

979 if token["data"] == "\u0000": 

980 # The tokenizer should always emit null on its own 

981 return 

982 self.tree.reconstructActiveFormattingElements() 

983 self.tree.insertText(token["data"]) 

984 # This must be bad for performance 

985 if (self.parser.framesetOK and 

986 any(char not in spaceCharacters 

987 for char in token["data"])): 

988 self.parser.framesetOK = False 

989 

990 def processSpaceCharactersNonPre(self, token): 

991 self.tree.reconstructActiveFormattingElements() 

992 self.tree.insertText(token["data"]) 

993 

994 def startTagProcessInHead(self, token): 

995 return self.parser.phases["inHead"].processStartTag(token) 

996 

997 def startTagBody(self, token): 

998 self.parser.parseError("unexpected-start-tag", {"name": "body"}) 

999 if (len(self.tree.openElements) == 1 or 

1000 self.tree.openElements[1].name != "body"): 

1001 assert self.parser.innerHTML 

1002 else: 

1003 self.parser.framesetOK = False 

1004 for attr, value in token["data"].items(): 

1005 if attr not in self.tree.openElements[1].attributes: 

1006 self.tree.openElements[1].attributes[attr] = value 

1007 

1008 def startTagFrameset(self, token): 

1009 self.parser.parseError("unexpected-start-tag", {"name": "frameset"}) 

1010 if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"): 

1011 assert self.parser.innerHTML 

1012 elif not self.parser.framesetOK: 

1013 pass 

1014 else: 

1015 if self.tree.openElements[1].parent: 

1016 self.tree.openElements[1].parent.removeChild(self.tree.openElements[1]) 

1017 while self.tree.openElements[-1].name != "html": 

1018 self.tree.openElements.pop() 

1019 self.tree.insertElement(token) 

1020 self.parser.phase = self.parser.phases["inFrameset"] 

1021 

1022 def startTagCloseP(self, token): 

1023 if self.tree.elementInScope("p", variant="button"): 

1024 self.endTagP(impliedTagToken("p")) 

1025 self.tree.insertElement(token) 

1026 

1027 def startTagPreListing(self, token): 

1028 if self.tree.elementInScope("p", variant="button"): 

1029 self.endTagP(impliedTagToken("p")) 

1030 self.tree.insertElement(token) 

1031 self.parser.framesetOK = False 

1032 self.processSpaceCharacters = self.processSpaceCharactersDropNewline 

1033 

1034 def startTagForm(self, token): 

1035 if self.tree.formPointer: 

1036 self.parser.parseError("unexpected-start-tag", {"name": "form"}) 

1037 else: 

1038 if self.tree.elementInScope("p", variant="button"): 

1039 self.endTagP(impliedTagToken("p")) 

1040 self.tree.insertElement(token) 

1041 self.tree.formPointer = self.tree.openElements[-1] 

1042 

1043 def startTagListItem(self, token): 

1044 self.parser.framesetOK = False 

1045 

1046 stopNamesMap = {"li": ["li"], 

1047 "dt": ["dt", "dd"], 

1048 "dd": ["dt", "dd"]} 

1049 stopNames = stopNamesMap[token["name"]] 

1050 for node in reversed(self.tree.openElements): 

1051 if node.name in stopNames: 

1052 self.parser.phase.processEndTag( 

1053 impliedTagToken(node.name, "EndTag")) 

1054 break 

1055 if (node.nameTuple in specialElements and 

1056 node.name not in ("address", "div", "p")): 

1057 break 

1058 

1059 if self.tree.elementInScope("p", variant="button"): 

1060 self.parser.phase.processEndTag( 

1061 impliedTagToken("p", "EndTag")) 

1062 

1063 self.tree.insertElement(token) 

1064 

1065 def startTagPlaintext(self, token): 

1066 if self.tree.elementInScope("p", variant="button"): 

1067 self.endTagP(impliedTagToken("p")) 

1068 self.tree.insertElement(token) 

1069 self.parser.tokenizer.state = self.parser.tokenizer.plaintextState 

1070 

1071 def startTagHeading(self, token): 

1072 if self.tree.elementInScope("p", variant="button"): 

1073 self.endTagP(impliedTagToken("p")) 

1074 if self.tree.openElements[-1].name in headingElements: 

1075 self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) 

1076 self.tree.openElements.pop() 

1077 self.tree.insertElement(token) 

1078 

1079 def startTagA(self, token): 

1080 afeAElement = self.tree.elementInActiveFormattingElements("a") 

1081 if afeAElement: 

1082 self.parser.parseError("unexpected-start-tag-implies-end-tag", 

1083 {"startName": "a", "endName": "a"}) 

1084 self.endTagFormatting(impliedTagToken("a")) 

1085 if afeAElement in self.tree.openElements: 

1086 self.tree.openElements.remove(afeAElement) 

1087 if afeAElement in self.tree.activeFormattingElements: 

1088 self.tree.activeFormattingElements.remove(afeAElement) 

1089 self.tree.reconstructActiveFormattingElements() 

1090 self.addFormattingElement(token) 

1091 

1092 def startTagFormatting(self, token): 

1093 self.tree.reconstructActiveFormattingElements() 

1094 self.addFormattingElement(token) 

1095 

1096 def startTagNobr(self, token): 

1097 self.tree.reconstructActiveFormattingElements() 

1098 if self.tree.elementInScope("nobr"): 

1099 self.parser.parseError("unexpected-start-tag-implies-end-tag", 

1100 {"startName": "nobr", "endName": "nobr"}) 

1101 self.processEndTag(impliedTagToken("nobr")) 

1102 # XXX Need tests that trigger the following 

1103 self.tree.reconstructActiveFormattingElements() 

1104 self.addFormattingElement(token) 

1105 

1106 def startTagButton(self, token): 

1107 if self.tree.elementInScope("button"): 

1108 self.parser.parseError("unexpected-start-tag-implies-end-tag", 

1109 {"startName": "button", "endName": "button"}) 

1110 self.processEndTag(impliedTagToken("button")) 

1111 return token 

1112 else: 

1113 self.tree.reconstructActiveFormattingElements() 

1114 self.tree.insertElement(token) 

1115 self.parser.framesetOK = False 

1116 

1117 def startTagAppletMarqueeObject(self, token): 

1118 self.tree.reconstructActiveFormattingElements() 

1119 self.tree.insertElement(token) 

1120 self.tree.activeFormattingElements.append(Marker) 

1121 self.parser.framesetOK = False 

1122 

1123 def startTagXmp(self, token): 

1124 if self.tree.elementInScope("p", variant="button"): 

1125 self.endTagP(impliedTagToken("p")) 

1126 self.tree.reconstructActiveFormattingElements() 

1127 self.parser.framesetOK = False 

1128 self.parser.parseRCDataRawtext(token, "RAWTEXT") 

1129 

1130 def startTagTable(self, token): 

1131 if self.parser.compatMode != "quirks": 

1132 if self.tree.elementInScope("p", variant="button"): 

1133 self.processEndTag(impliedTagToken("p")) 

1134 self.tree.insertElement(token) 

1135 self.parser.framesetOK = False 

1136 self.parser.phase = self.parser.phases["inTable"] 

1137 

1138 def startTagVoidFormatting(self, token): 

1139 self.tree.reconstructActiveFormattingElements() 

1140 self.tree.insertElement(token) 

1141 self.tree.openElements.pop() 

1142 token["selfClosingAcknowledged"] = True 

1143 self.parser.framesetOK = False 

1144 

1145 def startTagInput(self, token): 

1146 framesetOK = self.parser.framesetOK 

1147 self.startTagVoidFormatting(token) 

1148 if ("type" in token["data"] and 

1149 token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): 

1150 # input type=hidden doesn't change framesetOK 

1151 self.parser.framesetOK = framesetOK 

1152 

1153 def startTagParamSource(self, token): 

1154 self.tree.insertElement(token) 

1155 self.tree.openElements.pop() 

1156 token["selfClosingAcknowledged"] = True 

1157 

1158 def startTagHr(self, token): 

1159 if self.tree.elementInScope("p", variant="button"): 

1160 self.endTagP(impliedTagToken("p")) 

1161 self.tree.insertElement(token) 

1162 self.tree.openElements.pop() 

1163 token["selfClosingAcknowledged"] = True 

1164 self.parser.framesetOK = False 

1165 

1166 def startTagImage(self, token): 

1167 # No really... 

1168 self.parser.parseError("unexpected-start-tag-treated-as", 

1169 {"originalName": "image", "newName": "img"}) 

1170 self.processStartTag(impliedTagToken("img", "StartTag", 

1171 attributes=token["data"], 

1172 selfClosing=token["selfClosing"])) 

1173 

1174 def startTagIsIndex(self, token): 

1175 self.parser.parseError("deprecated-tag", {"name": "isindex"}) 

1176 if self.tree.formPointer: 

1177 return 

1178 form_attrs = {} 

1179 if "action" in token["data"]: 

1180 form_attrs["action"] = token["data"]["action"] 

1181 self.processStartTag(impliedTagToken("form", "StartTag", 

1182 attributes=form_attrs)) 

1183 self.processStartTag(impliedTagToken("hr", "StartTag")) 

1184 self.processStartTag(impliedTagToken("label", "StartTag")) 

1185 # XXX Localization ... 

1186 if "prompt" in token["data"]: 

1187 prompt = token["data"]["prompt"] 

1188 else: 

1189 prompt = "This is a searchable index. Enter search keywords: " 

1190 self.processCharacters( 

1191 {"type": tokenTypes["Characters"], "data": prompt}) 

1192 attributes = token["data"].copy() 

1193 if "action" in attributes: 

1194 del attributes["action"] 

1195 if "prompt" in attributes: 

1196 del attributes["prompt"] 

1197 attributes["name"] = "isindex" 

1198 self.processStartTag(impliedTagToken("input", "StartTag", 

1199 attributes=attributes, 

1200 selfClosing=token["selfClosing"])) 

1201 self.processEndTag(impliedTagToken("label")) 

1202 self.processStartTag(impliedTagToken("hr", "StartTag")) 

1203 self.processEndTag(impliedTagToken("form")) 

1204 

1205 def startTagTextarea(self, token): 

1206 self.tree.insertElement(token) 

1207 self.parser.tokenizer.state = self.parser.tokenizer.rcdataState 

1208 self.processSpaceCharacters = self.processSpaceCharactersDropNewline 

1209 self.parser.framesetOK = False 

1210 

1211 def startTagIFrame(self, token): 

1212 self.parser.framesetOK = False 

1213 self.startTagRawtext(token) 

1214 

1215 def startTagNoscript(self, token): 

1216 if self.parser.scripting: 

1217 self.startTagRawtext(token) 

1218 else: 

1219 self.startTagOther(token) 

1220 

1221 def startTagRawtext(self, token): 

1222 """iframe, noembed noframes, noscript(if scripting enabled)""" 

1223 self.parser.parseRCDataRawtext(token, "RAWTEXT") 

1224 

1225 def startTagOpt(self, token): 

1226 if self.tree.openElements[-1].name == "option": 

1227 self.parser.phase.processEndTag(impliedTagToken("option")) 

1228 self.tree.reconstructActiveFormattingElements() 

1229 self.parser.tree.insertElement(token) 

1230 

1231 def startTagSelect(self, token): 

1232 self.tree.reconstructActiveFormattingElements() 

1233 self.tree.insertElement(token) 

1234 self.parser.framesetOK = False 

1235 if self.parser.phase in (self.parser.phases["inTable"], 

1236 self.parser.phases["inCaption"], 

1237 self.parser.phases["inColumnGroup"], 

1238 self.parser.phases["inTableBody"], 

1239 self.parser.phases["inRow"], 

1240 self.parser.phases["inCell"]): 

1241 self.parser.phase = self.parser.phases["inSelectInTable"] 

1242 else: 

1243 self.parser.phase = self.parser.phases["inSelect"] 

1244 

1245 def startTagRpRt(self, token): 

1246 if self.tree.elementInScope("ruby"): 

1247 self.tree.generateImpliedEndTags() 

1248 if self.tree.openElements[-1].name != "ruby": 

1249 self.parser.parseError() 

1250 self.tree.insertElement(token) 

1251 

1252 def startTagMath(self, token): 

1253 self.tree.reconstructActiveFormattingElements() 

1254 self.parser.adjustMathMLAttributes(token) 

1255 self.parser.adjustForeignAttributes(token) 

1256 token["namespace"] = namespaces["mathml"] 

1257 self.tree.insertElement(token) 

1258 # Need to get the parse error right for the case where the token 

1259 # has a namespace not equal to the xmlns attribute 

1260 if token["selfClosing"]: 

1261 self.tree.openElements.pop() 

1262 token["selfClosingAcknowledged"] = True 

1263 

1264 def startTagSvg(self, token): 

1265 self.tree.reconstructActiveFormattingElements() 

1266 self.parser.adjustSVGAttributes(token) 

1267 self.parser.adjustForeignAttributes(token) 

1268 token["namespace"] = namespaces["svg"] 

1269 self.tree.insertElement(token) 

1270 # Need to get the parse error right for the case where the token 

1271 # has a namespace not equal to the xmlns attribute 

1272 if token["selfClosing"]: 

1273 self.tree.openElements.pop() 

1274 token["selfClosingAcknowledged"] = True 

1275 

1276 def startTagMisplaced(self, token): 

1277 """ Elements that should be children of other elements that have a 

1278 different insertion mode; here they are ignored 

1279 "caption", "col", "colgroup", "frame", "frameset", "head", 

1280 "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", 

1281 "tr", "noscript" 

1282 """ 

1283 self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]}) 

1284 

1285 def startTagOther(self, token): 

1286 self.tree.reconstructActiveFormattingElements() 

1287 self.tree.insertElement(token) 

1288 

1289 def endTagP(self, token): 

1290 if not self.tree.elementInScope("p", variant="button"): 

1291 self.startTagCloseP(impliedTagToken("p", "StartTag")) 

1292 self.parser.parseError("unexpected-end-tag", {"name": "p"}) 

1293 self.endTagP(impliedTagToken("p", "EndTag")) 

1294 else: 

1295 self.tree.generateImpliedEndTags("p") 

1296 if self.tree.openElements[-1].name != "p": 

1297 self.parser.parseError("unexpected-end-tag", {"name": "p"}) 

1298 node = self.tree.openElements.pop() 

1299 while node.name != "p": 

1300 node = self.tree.openElements.pop() 

1301 

1302 def endTagBody(self, token): 

1303 if not self.tree.elementInScope("body"): 

1304 self.parser.parseError() 

1305 return 

1306 elif self.tree.openElements[-1].name != "body": 

1307 for node in self.tree.openElements[2:]: 

1308 if node.name not in frozenset(("dd", "dt", "li", "optgroup", 

1309 "option", "p", "rp", "rt", 

1310 "tbody", "td", "tfoot", 

1311 "th", "thead", "tr", "body", 

1312 "html")): 

1313 # Not sure this is the correct name for the parse error 

1314 self.parser.parseError( 

1315 "expected-one-end-tag-but-got-another", 

1316 {"gotName": "body", "expectedName": node.name}) 

1317 break 

1318 self.parser.phase = self.parser.phases["afterBody"] 

1319 

1320 def endTagHtml(self, token): 

1321 # We repeat the test for the body end tag token being ignored here 

1322 if self.tree.elementInScope("body"): 

1323 self.endTagBody(impliedTagToken("body")) 

1324 return token 

1325 

1326 def endTagBlock(self, token): 

1327 # Put us back in the right whitespace handling mode 

1328 if token["name"] == "pre": 

1329 self.processSpaceCharacters = self.processSpaceCharactersNonPre 

1330 inScope = self.tree.elementInScope(token["name"]) 

1331 if inScope: 

1332 self.tree.generateImpliedEndTags() 

1333 if self.tree.openElements[-1].name != token["name"]: 

1334 self.parser.parseError("end-tag-too-early", {"name": token["name"]}) 

1335 if inScope: 

1336 node = self.tree.openElements.pop() 

1337 while node.name != token["name"]: 

1338 node = self.tree.openElements.pop() 

1339 

1340 def endTagForm(self, token): 

1341 node = self.tree.formPointer 

1342 self.tree.formPointer = None 

1343 if node is None or not self.tree.elementInScope(node): 

1344 self.parser.parseError("unexpected-end-tag", 

1345 {"name": "form"}) 

1346 else: 

1347 self.tree.generateImpliedEndTags() 

1348 if self.tree.openElements[-1] != node: 

1349 self.parser.parseError("end-tag-too-early-ignored", 

1350 {"name": "form"}) 

1351 self.tree.openElements.remove(node) 

1352 

1353 def endTagListItem(self, token): 

1354 if token["name"] == "li": 

1355 variant = "list" 

1356 else: 

1357 variant = None 

1358 if not self.tree.elementInScope(token["name"], variant=variant): 

1359 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) 

1360 else: 

1361 self.tree.generateImpliedEndTags(exclude=token["name"]) 

1362 if self.tree.openElements[-1].name != token["name"]: 

1363 self.parser.parseError( 

1364 "end-tag-too-early", 

1365 {"name": token["name"]}) 

1366 node = self.tree.openElements.pop() 

1367 while node.name != token["name"]: 

1368 node = self.tree.openElements.pop() 

1369 

1370 def endTagHeading(self, token): 

1371 for item in headingElements: 

1372 if self.tree.elementInScope(item): 

1373 self.tree.generateImpliedEndTags() 

1374 break 

1375 if self.tree.openElements[-1].name != token["name"]: 

1376 self.parser.parseError("end-tag-too-early", {"name": token["name"]}) 

1377 

1378 for item in headingElements: 

1379 if self.tree.elementInScope(item): 

1380 item = self.tree.openElements.pop() 

1381 while item.name not in headingElements: 

1382 item = self.tree.openElements.pop() 

1383 break 

1384 

1385 def endTagFormatting(self, token): 

1386 """The much-feared adoption agency algorithm""" 

1387 # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867 

1388 # XXX Better parseError messages appreciated. 

1389 

1390 # Step 1 

1391 outerLoopCounter = 0 

1392 

1393 # Step 2 

1394 while outerLoopCounter < 8: 

1395 

1396 # Step 3 

1397 outerLoopCounter += 1 

1398 

1399 # Step 4: 

1400 

1401 # Let the formatting element be the last element in 

1402 # the list of active formatting elements that: 

1403 # - is between the end of the list and the last scope 

1404 # marker in the list, if any, or the start of the list 

1405 # otherwise, and 

1406 # - has the same tag name as the token. 

1407 formattingElement = self.tree.elementInActiveFormattingElements( 

1408 token["name"]) 

1409 if (not formattingElement or 

1410 (formattingElement in self.tree.openElements and 

1411 not self.tree.elementInScope(formattingElement.name))): 

1412 # If there is no such node, then abort these steps 

1413 # and instead act as described in the "any other 

1414 # end tag" entry below. 

1415 self.endTagOther(token) 

1416 return 

1417 

1418 # Otherwise, if there is such a node, but that node is 

1419 # not in the stack of open elements, then this is a 

1420 # parse error; remove the element from the list, and 

1421 # abort these steps. 

1422 elif formattingElement not in self.tree.openElements: 

1423 self.parser.parseError("adoption-agency-1.2", {"name": token["name"]}) 

1424 self.tree.activeFormattingElements.remove(formattingElement) 

1425 return 

1426 

1427 # Otherwise, if there is such a node, and that node is 

1428 # also in the stack of open elements, but the element 

1429 # is not in scope, then this is a parse error; ignore 

1430 # the token, and abort these steps. 

1431 elif not self.tree.elementInScope(formattingElement.name): 

1432 self.parser.parseError("adoption-agency-4.4", {"name": token["name"]}) 

1433 return 

1434 

1435 # Otherwise, there is a formatting element and that 

1436 # element is in the stack and is in scope. If the 

1437 # element is not the current node, this is a parse 

1438 # error. In any case, proceed with the algorithm as 

1439 # written in the following steps. 

1440 else: 

1441 if formattingElement != self.tree.openElements[-1]: 

1442 self.parser.parseError("adoption-agency-1.3", {"name": token["name"]}) 

1443 

1444 # Step 5: 

1445 

1446 # Let the furthest block be the topmost node in the 

1447 # stack of open elements that is lower in the stack 

1448 # than the formatting element, and is an element in 

1449 # the special category. There might not be one. 

1450 afeIndex = self.tree.openElements.index(formattingElement) 

1451 furthestBlock = None 

1452 for element in self.tree.openElements[afeIndex:]: 

1453 if element.nameTuple in specialElements: 

1454 furthestBlock = element 

1455 break 

1456 

1457 # Step 6: 

1458 

1459 # If there is no furthest block, then the UA must 

1460 # first pop all the nodes from the bottom of the stack 

1461 # of open elements, from the current node up to and 

1462 # including the formatting element, then remove the 

1463 # formatting element from the list of active 

1464 # formatting elements, and finally abort these steps. 

1465 if furthestBlock is None: 

1466 element = self.tree.openElements.pop() 

1467 while element != formattingElement: 

1468 element = self.tree.openElements.pop() 

1469 self.tree.activeFormattingElements.remove(element) 

1470 return 

1471 

1472 # Step 7 

1473 commonAncestor = self.tree.openElements[afeIndex - 1] 

1474 

1475 # Step 8: 

1476 # The bookmark is supposed to help us identify where to reinsert 

1477 # nodes in step 15. We have to ensure that we reinsert nodes after 

1478 # the node before the active formatting element. Note the bookmark 

1479 # can move in step 9.7 

1480 bookmark = self.tree.activeFormattingElements.index(formattingElement) 

1481 

1482 # Step 9 

1483 lastNode = node = furthestBlock 

1484 innerLoopCounter = 0 

1485 

1486 index = self.tree.openElements.index(node) 

1487 while innerLoopCounter < 3: 

1488 innerLoopCounter += 1 

1489 # Node is element before node in open elements 

1490 index -= 1 

1491 node = self.tree.openElements[index] 

1492 if node not in self.tree.activeFormattingElements: 

1493 self.tree.openElements.remove(node) 

1494 continue 

1495 # Step 9.6 

1496 if node == formattingElement: 

1497 break 

1498 # Step 9.7 

1499 if lastNode == furthestBlock: 

1500 bookmark = self.tree.activeFormattingElements.index(node) + 1 

1501 # Step 9.8 

1502 clone = node.cloneNode() 

1503 # Replace node with clone 

1504 self.tree.activeFormattingElements[ 

1505 self.tree.activeFormattingElements.index(node)] = clone 

1506 self.tree.openElements[ 

1507 self.tree.openElements.index(node)] = clone 

1508 node = clone 

1509 # Step 9.9 

1510 # Remove lastNode from its parents, if any 

1511 if lastNode.parent: 

1512 lastNode.parent.removeChild(lastNode) 

1513 node.appendChild(lastNode) 

1514 # Step 9.10 

1515 lastNode = node 

1516 

1517 # Step 10 

1518 # Foster parent lastNode if commonAncestor is a 

1519 # table, tbody, tfoot, thead, or tr we need to foster 

1520 # parent the lastNode 

1521 if lastNode.parent: 

1522 lastNode.parent.removeChild(lastNode) 

1523 

1524 if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")): 

1525 parent, insertBefore = self.tree.getTableMisnestedNodePosition() 

1526 parent.insertBefore(lastNode, insertBefore) 

1527 else: 

1528 commonAncestor.appendChild(lastNode) 

1529 

1530 # Step 11 

1531 clone = formattingElement.cloneNode() 

1532 

1533 # Step 12 

1534 furthestBlock.reparentChildren(clone) 

1535 

1536 # Step 13 

1537 furthestBlock.appendChild(clone) 

1538 

1539 # Step 14 

1540 self.tree.activeFormattingElements.remove(formattingElement) 

1541 self.tree.activeFormattingElements.insert(bookmark, clone) 

1542 

1543 # Step 15 

1544 self.tree.openElements.remove(formattingElement) 

1545 self.tree.openElements.insert( 

1546 self.tree.openElements.index(furthestBlock) + 1, clone) 

1547 

1548 def endTagAppletMarqueeObject(self, token): 

1549 if self.tree.elementInScope(token["name"]): 

1550 self.tree.generateImpliedEndTags() 

1551 if self.tree.openElements[-1].name != token["name"]: 

1552 self.parser.parseError("end-tag-too-early", {"name": token["name"]}) 

1553 

1554 if self.tree.elementInScope(token["name"]): 

1555 element = self.tree.openElements.pop() 

1556 while element.name != token["name"]: 

1557 element = self.tree.openElements.pop() 

1558 self.tree.clearActiveFormattingElements() 

1559 

1560 def endTagBr(self, token): 

1561 self.parser.parseError("unexpected-end-tag-treated-as", 

1562 {"originalName": "br", "newName": "br element"}) 

1563 self.tree.reconstructActiveFormattingElements() 

1564 self.tree.insertElement(impliedTagToken("br", "StartTag")) 

1565 self.tree.openElements.pop() 

1566 

1567 def endTagOther(self, token): 

1568 for node in self.tree.openElements[::-1]: 

1569 if node.name == token["name"]: 

1570 self.tree.generateImpliedEndTags(exclude=token["name"]) 

1571 if self.tree.openElements[-1].name != token["name"]: 

1572 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) 

1573 while self.tree.openElements.pop() != node: 

1574 pass 

1575 break 

1576 else: 

1577 if node.nameTuple in specialElements: 

1578 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) 

1579 break 

1580 

1581 startTagHandler = _utils.MethodDispatcher([ 

1582 ("html", Phase.startTagHtml), 

1583 (("base", "basefont", "bgsound", "command", "link", "meta", 

1584 "script", "style", "title"), 

1585 startTagProcessInHead), 

1586 ("body", startTagBody), 

1587 ("frameset", startTagFrameset), 

1588 (("address", "article", "aside", "blockquote", "center", "details", 

1589 "dir", "div", "dl", "fieldset", "figcaption", "figure", 

1590 "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", 

1591 "section", "summary", "ul"), 

1592 startTagCloseP), 

1593 (headingElements, startTagHeading), 

1594 (("pre", "listing"), startTagPreListing), 

1595 ("form", startTagForm), 

1596 (("li", "dd", "dt"), startTagListItem), 

1597 ("plaintext", startTagPlaintext), 

1598 ("a", startTagA), 

1599 (("b", "big", "code", "em", "font", "i", "s", "small", "strike", 

1600 "strong", "tt", "u"), startTagFormatting), 

1601 ("nobr", startTagNobr), 

1602 ("button", startTagButton), 

1603 (("applet", "marquee", "object"), startTagAppletMarqueeObject), 

1604 ("xmp", startTagXmp), 

1605 ("table", startTagTable), 

1606 (("area", "br", "embed", "img", "keygen", "wbr"), 

1607 startTagVoidFormatting), 

1608 (("param", "source", "track"), startTagParamSource), 

1609 ("input", startTagInput), 

1610 ("hr", startTagHr), 

1611 ("image", startTagImage), 

1612 ("isindex", startTagIsIndex), 

1613 ("textarea", startTagTextarea), 

1614 ("iframe", startTagIFrame), 

1615 ("noscript", startTagNoscript), 

1616 (("noembed", "noframes"), startTagRawtext), 

1617 ("select", startTagSelect), 

1618 (("rp", "rt"), startTagRpRt), 

1619 (("option", "optgroup"), startTagOpt), 

1620 (("math"), startTagMath), 

1621 (("svg"), startTagSvg), 

1622 (("caption", "col", "colgroup", "frame", "head", 

1623 "tbody", "td", "tfoot", "th", "thead", 

1624 "tr"), startTagMisplaced) 

1625 ]) 

1626 startTagHandler.default = startTagOther 

1627 

1628 endTagHandler = _utils.MethodDispatcher([ 

1629 ("body", endTagBody), 

1630 ("html", endTagHtml), 

1631 (("address", "article", "aside", "blockquote", "button", "center", 

1632 "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", 

1633 "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", 

1634 "section", "summary", "ul"), endTagBlock), 

1635 ("form", endTagForm), 

1636 ("p", endTagP), 

1637 (("dd", "dt", "li"), endTagListItem), 

1638 (headingElements, endTagHeading), 

1639 (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", 

1640 "strike", "strong", "tt", "u"), endTagFormatting), 

1641 (("applet", "marquee", "object"), endTagAppletMarqueeObject), 

1642 ("br", endTagBr), 

1643 ]) 

1644 endTagHandler.default = endTagOther 

1645 

1646 

1647class TextPhase(Phase): 

1648 __slots__ = tuple() 

1649 

1650 def processCharacters(self, token): 

1651 self.tree.insertText(token["data"]) 

1652 

1653 def processEOF(self): 

1654 self.parser.parseError("expected-named-closing-tag-but-got-eof", 

1655 {"name": self.tree.openElements[-1].name}) 

1656 self.tree.openElements.pop() 

1657 self.parser.phase = self.parser.originalPhase 

1658 return True 

1659 

1660 def startTagOther(self, token): 

1661 assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name'] 

1662 

1663 def endTagScript(self, token): 

1664 node = self.tree.openElements.pop() 

1665 assert node.name == "script" 

1666 self.parser.phase = self.parser.originalPhase 

1667 # The rest of this method is all stuff that only happens if 

1668 # document.write works 

1669 

1670 def endTagOther(self, token): 

1671 self.tree.openElements.pop() 

1672 self.parser.phase = self.parser.originalPhase 

1673 

1674 startTagHandler = _utils.MethodDispatcher([]) 

1675 startTagHandler.default = startTagOther 

1676 endTagHandler = _utils.MethodDispatcher([ 

1677 ("script", endTagScript)]) 

1678 endTagHandler.default = endTagOther 

1679 

1680 

1681class InTablePhase(Phase): 

1682 # http://www.whatwg.org/specs/web-apps/current-work/#in-table 

1683 __slots__ = tuple() 

1684 

1685 # helper methods 

1686 def clearStackToTableContext(self): 

1687 # "clear the stack back to a table context" 

1688 while self.tree.openElements[-1].name not in ("table", "html"): 

1689 # self.parser.parseError("unexpected-implied-end-tag-in-table", 

1690 # {"name": self.tree.openElements[-1].name}) 

1691 self.tree.openElements.pop() 

1692 # When the current node is <html> it's an innerHTML case 

1693 

1694 # processing methods 

1695 def processEOF(self): 

1696 if self.tree.openElements[-1].name != "html": 

1697 self.parser.parseError("eof-in-table") 

1698 else: 

1699 assert self.parser.innerHTML 

1700 # Stop parsing 

1701 

1702 def processSpaceCharacters(self, token): 

1703 originalPhase = self.parser.phase 

1704 self.parser.phase = self.parser.phases["inTableText"] 

1705 self.parser.phase.originalPhase = originalPhase 

1706 self.parser.phase.processSpaceCharacters(token) 

1707 

1708 def processCharacters(self, token): 

1709 originalPhase = self.parser.phase 

1710 self.parser.phase = self.parser.phases["inTableText"] 

1711 self.parser.phase.originalPhase = originalPhase 

1712 self.parser.phase.processCharacters(token) 

1713 

1714 def insertText(self, token): 

1715 # If we get here there must be at least one non-whitespace character 

1716 # Do the table magic! 

1717 self.tree.insertFromTable = True 

1718 self.parser.phases["inBody"].processCharacters(token) 

1719 self.tree.insertFromTable = False 

1720 

1721 def startTagCaption(self, token): 

1722 self.clearStackToTableContext() 

1723 self.tree.activeFormattingElements.append(Marker) 

1724 self.tree.insertElement(token) 

1725 self.parser.phase = self.parser.phases["inCaption"] 

1726 

1727 def startTagColgroup(self, token): 

1728 self.clearStackToTableContext() 

1729 self.tree.insertElement(token) 

1730 self.parser.phase = self.parser.phases["inColumnGroup"] 

1731 

1732 def startTagCol(self, token): 

1733 self.startTagColgroup(impliedTagToken("colgroup", "StartTag")) 

1734 return token 

1735 

1736 def startTagRowGroup(self, token): 

1737 self.clearStackToTableContext() 

1738 self.tree.insertElement(token) 

1739 self.parser.phase = self.parser.phases["inTableBody"] 

1740 

1741 def startTagImplyTbody(self, token): 

1742 self.startTagRowGroup(impliedTagToken("tbody", "StartTag")) 

1743 return token 

1744 

1745 def startTagTable(self, token): 

1746 self.parser.parseError("unexpected-start-tag-implies-end-tag", 

1747 {"startName": "table", "endName": "table"}) 

1748 self.parser.phase.processEndTag(impliedTagToken("table")) 

1749 if not self.parser.innerHTML: 

1750 return token 

1751 

1752 def startTagStyleScript(self, token): 

1753 return self.parser.phases["inHead"].processStartTag(token) 

1754 

1755 def startTagInput(self, token): 

1756 if ("type" in token["data"] and 

1757 token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): 

1758 self.parser.parseError("unexpected-hidden-input-in-table") 

1759 self.tree.insertElement(token) 

1760 # XXX associate with form 

1761 self.tree.openElements.pop() 

1762 else: 

1763 self.startTagOther(token) 

1764 

1765 def startTagForm(self, token): 

1766 self.parser.parseError("unexpected-form-in-table") 

1767 if self.tree.formPointer is None: 

1768 self.tree.insertElement(token) 

1769 self.tree.formPointer = self.tree.openElements[-1] 

1770 self.tree.openElements.pop() 

1771 

1772 def startTagOther(self, token): 

1773 self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]}) 

1774 # Do the table magic! 

1775 self.tree.insertFromTable = True 

1776 self.parser.phases["inBody"].processStartTag(token) 

1777 self.tree.insertFromTable = False 

1778 

1779 def endTagTable(self, token): 

1780 if self.tree.elementInScope("table", variant="table"): 

1781 self.tree.generateImpliedEndTags() 

1782 if self.tree.openElements[-1].name != "table": 

1783 self.parser.parseError("end-tag-too-early-named", 

1784 {"gotName": "table", 

1785 "expectedName": self.tree.openElements[-1].name}) 

1786 while self.tree.openElements[-1].name != "table": 

1787 self.tree.openElements.pop() 

1788 self.tree.openElements.pop() 

1789 self.parser.resetInsertionMode() 

1790 else: 

1791 # innerHTML case 

1792 assert self.parser.innerHTML 

1793 self.parser.parseError() 

1794 

1795 def endTagIgnore(self, token): 

1796 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) 

1797 

1798 def endTagOther(self, token): 

1799 self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]}) 

1800 # Do the table magic! 

1801 self.tree.insertFromTable = True 

1802 self.parser.phases["inBody"].processEndTag(token) 

1803 self.tree.insertFromTable = False 

1804 

1805 startTagHandler = _utils.MethodDispatcher([ 

1806 ("html", Phase.startTagHtml), 

1807 ("caption", startTagCaption), 

1808 ("colgroup", startTagColgroup), 

1809 ("col", startTagCol), 

1810 (("tbody", "tfoot", "thead"), startTagRowGroup), 

1811 (("td", "th", "tr"), startTagImplyTbody), 

1812 ("table", startTagTable), 

1813 (("style", "script"), startTagStyleScript), 

1814 ("input", startTagInput), 

1815 ("form", startTagForm) 

1816 ]) 

1817 startTagHandler.default = startTagOther 

1818 

1819 endTagHandler = _utils.MethodDispatcher([ 

1820 ("table", endTagTable), 

1821 (("body", "caption", "col", "colgroup", "html", "tbody", "td", 

1822 "tfoot", "th", "thead", "tr"), endTagIgnore) 

1823 ]) 

1824 endTagHandler.default = endTagOther 

1825 

1826 

1827class InTableTextPhase(Phase): 

1828 __slots__ = ("originalPhase", "characterTokens") 

1829 

1830 def __init__(self, *args, **kwargs): 

1831 super(InTableTextPhase, self).__init__(*args, **kwargs) 

1832 self.originalPhase = None 

1833 self.characterTokens = [] 

1834 

1835 def flushCharacters(self): 

1836 data = "".join([item["data"] for item in self.characterTokens]) 

1837 if any(item not in spaceCharacters for item in data): 

1838 token = {"type": tokenTypes["Characters"], "data": data} 

1839 self.parser.phases["inTable"].insertText(token) 

1840 elif data: 

1841 self.tree.insertText(data) 

1842 self.characterTokens = [] 

1843 

1844 def processComment(self, token): 

1845 self.flushCharacters() 

1846 self.parser.phase = self.originalPhase 

1847 return token 

1848 

1849 def processEOF(self): 

1850 self.flushCharacters() 

1851 self.parser.phase = self.originalPhase 

1852 return True 

1853 

1854 def processCharacters(self, token): 

1855 if token["data"] == "\u0000": 

1856 return 

1857 self.characterTokens.append(token) 

1858 

1859 def processSpaceCharacters(self, token): 

1860 # pretty sure we should never reach here 

1861 self.characterTokens.append(token) 

1862# assert False 

1863 

1864 def processStartTag(self, token): 

1865 self.flushCharacters() 

1866 self.parser.phase = self.originalPhase 

1867 return token 

1868 

1869 def processEndTag(self, token): 

1870 self.flushCharacters() 

1871 self.parser.phase = self.originalPhase 

1872 return token 

1873 

1874 

1875class InCaptionPhase(Phase): 

1876 # http://www.whatwg.org/specs/web-apps/current-work/#in-caption 

1877 __slots__ = tuple() 

1878 

1879 def ignoreEndTagCaption(self): 

1880 return not self.tree.elementInScope("caption", variant="table") 

1881 

1882 def processEOF(self): 

1883 self.parser.phases["inBody"].processEOF() 

1884 

1885 def processCharacters(self, token): 

1886 return self.parser.phases["inBody"].processCharacters(token) 

1887 

1888 def startTagTableElement(self, token): 

1889 self.parser.parseError() 

1890 # XXX Have to duplicate logic here to find out if the tag is ignored 

1891 ignoreEndTag = self.ignoreEndTagCaption() 

1892 self.parser.phase.processEndTag(impliedTagToken("caption")) 

1893 if not ignoreEndTag: 

1894 return token 

1895 

1896 def startTagOther(self, token): 

1897 return self.parser.phases["inBody"].processStartTag(token) 

1898 

1899 def endTagCaption(self, token): 

1900 if not self.ignoreEndTagCaption(): 

1901 # AT this code is quite similar to endTagTable in "InTable" 

1902 self.tree.generateImpliedEndTags() 

1903 if self.tree.openElements[-1].name != "caption": 

1904 self.parser.parseError("expected-one-end-tag-but-got-another", 

1905 {"gotName": "caption", 

1906 "expectedName": self.tree.openElements[-1].name}) 

1907 while self.tree.openElements[-1].name != "caption": 

1908 self.tree.openElements.pop() 

1909 self.tree.openElements.pop() 

1910 self.tree.clearActiveFormattingElements() 

1911 self.parser.phase = self.parser.phases["inTable"] 

1912 else: 

1913 # innerHTML case 

1914 assert self.parser.innerHTML 

1915 self.parser.parseError() 

1916 

1917 def endTagTable(self, token): 

1918 self.parser.parseError() 

1919 ignoreEndTag = self.ignoreEndTagCaption() 

1920 self.parser.phase.processEndTag(impliedTagToken("caption")) 

1921 if not ignoreEndTag: 

1922 return token 

1923 

1924 def endTagIgnore(self, token): 

1925 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) 

1926 

1927 def endTagOther(self, token): 

1928 return self.parser.phases["inBody"].processEndTag(token) 

1929 

1930 startTagHandler = _utils.MethodDispatcher([ 

1931 ("html", Phase.startTagHtml), 

1932 (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", 

1933 "thead", "tr"), startTagTableElement) 

1934 ]) 

1935 startTagHandler.default = startTagOther 

1936 

1937 endTagHandler = _utils.MethodDispatcher([ 

1938 ("caption", endTagCaption), 

1939 ("table", endTagTable), 

1940 (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", 

1941 "thead", "tr"), endTagIgnore) 

1942 ]) 

1943 endTagHandler.default = endTagOther 

1944 

1945 

1946class InColumnGroupPhase(Phase): 

1947 # http://www.whatwg.org/specs/web-apps/current-work/#in-column 

1948 __slots__ = tuple() 

1949 

1950 def ignoreEndTagColgroup(self): 

1951 return self.tree.openElements[-1].name == "html" 

1952 

1953 def processEOF(self): 

1954 if self.tree.openElements[-1].name == "html": 

1955 assert self.parser.innerHTML 

1956 return 

1957 else: 

1958 ignoreEndTag = self.ignoreEndTagColgroup() 

1959 self.endTagColgroup(impliedTagToken("colgroup")) 

1960 if not ignoreEndTag: 

1961 return True 

1962 

1963 def processCharacters(self, token): 

1964 ignoreEndTag = self.ignoreEndTagColgroup() 

1965 self.endTagColgroup(impliedTagToken("colgroup")) 

1966 if not ignoreEndTag: 

1967 return token 

1968 

1969 def startTagCol(self, token): 

1970 self.tree.insertElement(token) 

1971 self.tree.openElements.pop() 

1972 token["selfClosingAcknowledged"] = True 

1973 

1974 def startTagOther(self, token): 

1975 ignoreEndTag = self.ignoreEndTagColgroup() 

1976 self.endTagColgroup(impliedTagToken("colgroup")) 

1977 if not ignoreEndTag: 

1978 return token 

1979 

1980 def endTagColgroup(self, token): 

1981 if self.ignoreEndTagColgroup(): 

1982 # innerHTML case 

1983 assert self.parser.innerHTML 

1984 self.parser.parseError() 

1985 else: 

1986 self.tree.openElements.pop() 

1987 self.parser.phase = self.parser.phases["inTable"] 

1988 

1989 def endTagCol(self, token): 

1990 self.parser.parseError("no-end-tag", {"name": "col"}) 

1991 

1992 def endTagOther(self, token): 

1993 ignoreEndTag = self.ignoreEndTagColgroup() 

1994 self.endTagColgroup(impliedTagToken("colgroup")) 

1995 if not ignoreEndTag: 

1996 return token 

1997 

1998 startTagHandler = _utils.MethodDispatcher([ 

1999 ("html", Phase.startTagHtml), 

2000 ("col", startTagCol) 

2001 ]) 

2002 startTagHandler.default = startTagOther 

2003 

2004 endTagHandler = _utils.MethodDispatcher([ 

2005 ("colgroup", endTagColgroup), 

2006 ("col", endTagCol) 

2007 ]) 

2008 endTagHandler.default = endTagOther 

2009 

2010 

2011class InTableBodyPhase(Phase): 

2012 # http://www.whatwg.org/specs/web-apps/current-work/#in-table0 

2013 __slots__ = tuple() 

2014 

2015 # helper methods 

2016 def clearStackToTableBodyContext(self): 

2017 while self.tree.openElements[-1].name not in ("tbody", "tfoot", 

2018 "thead", "html"): 

2019 # self.parser.parseError("unexpected-implied-end-tag-in-table", 

2020 # {"name": self.tree.openElements[-1].name}) 

2021 self.tree.openElements.pop() 

2022 if self.tree.openElements[-1].name == "html": 

2023 assert self.parser.innerHTML 

2024 

2025 # the rest 

2026 def processEOF(self): 

2027 self.parser.phases["inTable"].processEOF() 

2028 

2029 def processSpaceCharacters(self, token): 

2030 return self.parser.phases["inTable"].processSpaceCharacters(token) 

2031 

2032 def processCharacters(self, token): 

2033 return self.parser.phases["inTable"].processCharacters(token) 

2034 

2035 def startTagTr(self, token): 

2036 self.clearStackToTableBodyContext() 

2037 self.tree.insertElement(token) 

2038 self.parser.phase = self.parser.phases["inRow"] 

2039 

2040 def startTagTableCell(self, token): 

2041 self.parser.parseError("unexpected-cell-in-table-body", 

2042 {"name": token["name"]}) 

2043 self.startTagTr(impliedTagToken("tr", "StartTag")) 

2044 return token 

2045 

2046 def startTagTableOther(self, token): 

2047 # XXX AT Any ideas on how to share this with endTagTable? 

2048 if (self.tree.elementInScope("tbody", variant="table") or 

2049 self.tree.elementInScope("thead", variant="table") or 

2050 self.tree.elementInScope("tfoot", variant="table")): 

2051 self.clearStackToTableBodyContext() 

2052 self.endTagTableRowGroup( 

2053 impliedTagToken(self.tree.openElements[-1].name)) 

2054 return token 

2055 else: 

2056 # innerHTML case 

2057 assert self.parser.innerHTML 

2058 self.parser.parseError() 

2059 

2060 def startTagOther(self, token): 

2061 return self.parser.phases["inTable"].processStartTag(token) 

2062 

2063 def endTagTableRowGroup(self, token): 

2064 if self.tree.elementInScope(token["name"], variant="table"): 

2065 self.clearStackToTableBodyContext() 

2066 self.tree.openElements.pop() 

2067 self.parser.phase = self.parser.phases["inTable"] 

2068 else: 

2069 self.parser.parseError("unexpected-end-tag-in-table-body", 

2070 {"name": token["name"]}) 

2071 

2072 def endTagTable(self, token): 

2073 if (self.tree.elementInScope("tbody", variant="table") or 

2074 self.tree.elementInScope("thead", variant="table") or 

2075 self.tree.elementInScope("tfoot", variant="table")): 

2076 self.clearStackToTableBodyContext() 

2077 self.endTagTableRowGroup( 

2078 impliedTagToken(self.tree.openElements[-1].name)) 

2079 return token 

2080 else: 

2081 # innerHTML case 

2082 assert self.parser.innerHTML 

2083 self.parser.parseError() 

2084 

2085 def endTagIgnore(self, token): 

2086 self.parser.parseError("unexpected-end-tag-in-table-body", 

2087 {"name": token["name"]}) 

2088 

2089 def endTagOther(self, token): 

2090 return self.parser.phases["inTable"].processEndTag(token) 

2091 

2092 startTagHandler = _utils.MethodDispatcher([ 

2093 ("html", Phase.startTagHtml), 

2094 ("tr", startTagTr), 

2095 (("td", "th"), startTagTableCell), 

2096 (("caption", "col", "colgroup", "tbody", "tfoot", "thead"), 

2097 startTagTableOther) 

2098 ]) 

2099 startTagHandler.default = startTagOther 

2100 

2101 endTagHandler = _utils.MethodDispatcher([ 

2102 (("tbody", "tfoot", "thead"), endTagTableRowGroup), 

2103 ("table", endTagTable), 

2104 (("body", "caption", "col", "colgroup", "html", "td", "th", 

2105 "tr"), endTagIgnore) 

2106 ]) 

2107 endTagHandler.default = endTagOther 

2108 

2109 

2110class InRowPhase(Phase): 

2111 # http://www.whatwg.org/specs/web-apps/current-work/#in-row 

2112 __slots__ = tuple() 

2113 

2114 # helper methods (XXX unify this with other table helper methods) 

2115 def clearStackToTableRowContext(self): 

2116 while self.tree.openElements[-1].name not in ("tr", "html"): 

2117 self.parser.parseError("unexpected-implied-end-tag-in-table-row", 

2118 {"name": self.tree.openElements[-1].name}) 

2119 self.tree.openElements.pop() 

2120 

2121 def ignoreEndTagTr(self): 

2122 return not self.tree.elementInScope("tr", variant="table") 

2123 

2124 # the rest 

2125 def processEOF(self): 

2126 self.parser.phases["inTable"].processEOF() 

2127 

2128 def processSpaceCharacters(self, token): 

2129 return self.parser.phases["inTable"].processSpaceCharacters(token) 

2130 

2131 def processCharacters(self, token): 

2132 return self.parser.phases["inTable"].processCharacters(token) 

2133 

2134 def startTagTableCell(self, token): 

2135 self.clearStackToTableRowContext() 

2136 self.tree.insertElement(token) 

2137 self.parser.phase = self.parser.phases["inCell"] 

2138 self.tree.activeFormattingElements.append(Marker) 

2139 

2140 def startTagTableOther(self, token): 

2141 ignoreEndTag = self.ignoreEndTagTr() 

2142 self.endTagTr(impliedTagToken("tr")) 

2143 # XXX how are we sure it's always ignored in the innerHTML case? 

2144 if not ignoreEndTag: 

2145 return token 

2146 

2147 def startTagOther(self, token): 

2148 return self.parser.phases["inTable"].processStartTag(token) 

2149 

2150 def endTagTr(self, token): 

2151 if not self.ignoreEndTagTr(): 

2152 self.clearStackToTableRowContext() 

2153 self.tree.openElements.pop() 

2154 self.parser.phase = self.parser.phases["inTableBody"] 

2155 else: 

2156 # innerHTML case 

2157 assert self.parser.innerHTML 

2158 self.parser.parseError() 

2159 

2160 def endTagTable(self, token): 

2161 ignoreEndTag = self.ignoreEndTagTr() 

2162 self.endTagTr(impliedTagToken("tr")) 

2163 # Reprocess the current tag if the tr end tag was not ignored 

2164 # XXX how are we sure it's always ignored in the innerHTML case? 

2165 if not ignoreEndTag: 

2166 return token 

2167 

2168 def endTagTableRowGroup(self, token): 

2169 if self.tree.elementInScope(token["name"], variant="table"): 

2170 self.endTagTr(impliedTagToken("tr")) 

2171 return token 

2172 else: 

2173 self.parser.parseError() 

2174 

2175 def endTagIgnore(self, token): 

2176 self.parser.parseError("unexpected-end-tag-in-table-row", 

2177 {"name": token["name"]}) 

2178 

2179 def endTagOther(self, token): 

2180 return self.parser.phases["inTable"].processEndTag(token) 

2181 

2182 startTagHandler = _utils.MethodDispatcher([ 

2183 ("html", Phase.startTagHtml), 

2184 (("td", "th"), startTagTableCell), 

2185 (("caption", "col", "colgroup", "tbody", "tfoot", "thead", 

2186 "tr"), startTagTableOther) 

2187 ]) 

2188 startTagHandler.default = startTagOther 

2189 

2190 endTagHandler = _utils.MethodDispatcher([ 

2191 ("tr", endTagTr), 

2192 ("table", endTagTable), 

2193 (("tbody", "tfoot", "thead"), endTagTableRowGroup), 

2194 (("body", "caption", "col", "colgroup", "html", "td", "th"), 

2195 endTagIgnore) 

2196 ]) 

2197 endTagHandler.default = endTagOther 

2198 

2199 

2200class InCellPhase(Phase): 

2201 # http://www.whatwg.org/specs/web-apps/current-work/#in-cell 

2202 __slots__ = tuple() 

2203 

2204 # helper 

2205 def closeCell(self): 

2206 if self.tree.elementInScope("td", variant="table"): 

2207 self.endTagTableCell(impliedTagToken("td")) 

2208 elif self.tree.elementInScope("th", variant="table"): 

2209 self.endTagTableCell(impliedTagToken("th")) 

2210 

2211 # the rest 

2212 def processEOF(self): 

2213 self.parser.phases["inBody"].processEOF() 

2214 

2215 def processCharacters(self, token): 

2216 return self.parser.phases["inBody"].processCharacters(token) 

2217 

2218 def startTagTableOther(self, token): 

2219 if (self.tree.elementInScope("td", variant="table") or 

2220 self.tree.elementInScope("th", variant="table")): 

2221 self.closeCell() 

2222 return token 

2223 else: 

2224 # innerHTML case 

2225 assert self.parser.innerHTML 

2226 self.parser.parseError() 

2227 

2228 def startTagOther(self, token): 

2229 return self.parser.phases["inBody"].processStartTag(token) 

2230 

2231 def endTagTableCell(self, token): 

2232 if self.tree.elementInScope(token["name"], variant="table"): 

2233 self.tree.generateImpliedEndTags(token["name"]) 

2234 if self.tree.openElements[-1].name != token["name"]: 

2235 self.parser.parseError("unexpected-cell-end-tag", 

2236 {"name": token["name"]}) 

2237 while True: 

2238 node = self.tree.openElements.pop() 

2239 if node.name == token["name"]: 

2240 break 

2241 else: 

2242 self.tree.openElements.pop() 

2243 self.tree.clearActiveFormattingElements() 

2244 self.parser.phase = self.parser.phases["inRow"] 

2245 else: 

2246 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) 

2247 

2248 def endTagIgnore(self, token): 

2249 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) 

2250 

2251 def endTagImply(self, token): 

2252 if self.tree.elementInScope(token["name"], variant="table"): 

2253 self.closeCell() 

2254 return token 

2255 else: 

2256 # sometimes innerHTML case 

2257 self.parser.parseError() 

2258 

2259 def endTagOther(self, token): 

2260 return self.parser.phases["inBody"].processEndTag(token) 

2261 

2262 startTagHandler = _utils.MethodDispatcher([ 

2263 ("html", Phase.startTagHtml), 

2264 (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", 

2265 "thead", "tr"), startTagTableOther) 

2266 ]) 

2267 startTagHandler.default = startTagOther 

2268 

2269 endTagHandler = _utils.MethodDispatcher([ 

2270 (("td", "th"), endTagTableCell), 

2271 (("body", "caption", "col", "colgroup", "html"), endTagIgnore), 

2272 (("table", "tbody", "tfoot", "thead", "tr"), endTagImply) 

2273 ]) 

2274 endTagHandler.default = endTagOther 

2275 

2276 

2277class InSelectPhase(Phase): 

2278 __slots__ = tuple() 

2279 

2280 # http://www.whatwg.org/specs/web-apps/current-work/#in-select 

2281 def processEOF(self): 

2282 if self.tree.openElements[-1].name != "html": 

2283 self.parser.parseError("eof-in-select") 

2284 else: 

2285 assert self.parser.innerHTML 

2286 

2287 def processCharacters(self, token): 

2288 if token["data"] == "\u0000": 

2289 return 

2290 self.tree.insertText(token["data"]) 

2291 

2292 def startTagOption(self, token): 

2293 # We need to imply </option> if <option> is the current node. 

2294 if self.tree.openElements[-1].name == "option": 

2295 self.tree.openElements.pop() 

2296 self.tree.insertElement(token) 

2297 

2298 def startTagOptgroup(self, token): 

2299 if self.tree.openElements[-1].name == "option": 

2300 self.tree.openElements.pop() 

2301 if self.tree.openElements[-1].name == "optgroup": 

2302 self.tree.openElements.pop() 

2303 self.tree.insertElement(token) 

2304 

2305 def startTagSelect(self, token): 

2306 self.parser.parseError("unexpected-select-in-select") 

2307 self.endTagSelect(impliedTagToken("select")) 

2308 

2309 def startTagInput(self, token): 

2310 self.parser.parseError("unexpected-input-in-select") 

2311 if self.tree.elementInScope("select", variant="select"): 

2312 self.endTagSelect(impliedTagToken("select")) 

2313 return token 

2314 else: 

2315 assert self.parser.innerHTML 

2316 

2317 def startTagScript(self, token): 

2318 return self.parser.phases["inHead"].processStartTag(token) 

2319 

2320 def startTagOther(self, token): 

2321 self.parser.parseError("unexpected-start-tag-in-select", 

2322 {"name": token["name"]}) 

2323 

2324 def endTagOption(self, token): 

2325 if self.tree.openElements[-1].name == "option": 

2326 self.tree.openElements.pop() 

2327 else: 

2328 self.parser.parseError("unexpected-end-tag-in-select", 

2329 {"name": "option"}) 

2330 

2331 def endTagOptgroup(self, token): 

2332 # </optgroup> implicitly closes <option> 

2333 if (self.tree.openElements[-1].name == "option" and 

2334 self.tree.openElements[-2].name == "optgroup"): 

2335 self.tree.openElements.pop() 

2336 # It also closes </optgroup> 

2337 if self.tree.openElements[-1].name == "optgroup": 

2338 self.tree.openElements.pop() 

2339 # But nothing else 

2340 else: 

2341 self.parser.parseError("unexpected-end-tag-in-select", 

2342 {"name": "optgroup"}) 

2343 

2344 def endTagSelect(self, token): 

2345 if self.tree.elementInScope("select", variant="select"): 

2346 node = self.tree.openElements.pop() 

2347 while node.name != "select": 

2348 node = self.tree.openElements.pop() 

2349 self.parser.resetInsertionMode() 

2350 else: 

2351 # innerHTML case 

2352 assert self.parser.innerHTML 

2353 self.parser.parseError() 

2354 

2355 def endTagOther(self, token): 

2356 self.parser.parseError("unexpected-end-tag-in-select", 

2357 {"name": token["name"]}) 

2358 

2359 startTagHandler = _utils.MethodDispatcher([ 

2360 ("html", Phase.startTagHtml), 

2361 ("option", startTagOption), 

2362 ("optgroup", startTagOptgroup), 

2363 ("select", startTagSelect), 

2364 (("input", "keygen", "textarea"), startTagInput), 

2365 ("script", startTagScript) 

2366 ]) 

2367 startTagHandler.default = startTagOther 

2368 

2369 endTagHandler = _utils.MethodDispatcher([ 

2370 ("option", endTagOption), 

2371 ("optgroup", endTagOptgroup), 

2372 ("select", endTagSelect) 

2373 ]) 

2374 endTagHandler.default = endTagOther 

2375 

2376 

2377class InSelectInTablePhase(Phase): 

2378 __slots__ = tuple() 

2379 

2380 def processEOF(self): 

2381 self.parser.phases["inSelect"].processEOF() 

2382 

2383 def processCharacters(self, token): 

2384 return self.parser.phases["inSelect"].processCharacters(token) 

2385 

2386 def startTagTable(self, token): 

2387 self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]}) 

2388 self.endTagOther(impliedTagToken("select")) 

2389 return token 

2390 

2391 def startTagOther(self, token): 

2392 return self.parser.phases["inSelect"].processStartTag(token) 

2393 

2394 def endTagTable(self, token): 

2395 self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]}) 

2396 if self.tree.elementInScope(token["name"], variant="table"): 

2397 self.endTagOther(impliedTagToken("select")) 

2398 return token 

2399 

2400 def endTagOther(self, token): 

2401 return self.parser.phases["inSelect"].processEndTag(token) 

2402 

2403 startTagHandler = _utils.MethodDispatcher([ 

2404 (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), 

2405 startTagTable) 

2406 ]) 

2407 startTagHandler.default = startTagOther 

2408 

2409 endTagHandler = _utils.MethodDispatcher([ 

2410 (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), 

2411 endTagTable) 

2412 ]) 

2413 endTagHandler.default = endTagOther 

2414 

2415 

2416class InForeignContentPhase(Phase): 

2417 __slots__ = tuple() 

2418 

2419 breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", 

2420 "center", "code", "dd", "div", "dl", "dt", 

2421 "em", "embed", "h1", "h2", "h3", 

2422 "h4", "h5", "h6", "head", "hr", "i", "img", 

2423 "li", "listing", "menu", "meta", "nobr", 

2424 "ol", "p", "pre", "ruby", "s", "small", 

2425 "span", "strong", "strike", "sub", "sup", 

2426 "table", "tt", "u", "ul", "var"]) 

2427 

2428 def adjustSVGTagNames(self, token): 

2429 replacements = {"altglyph": "altGlyph", 

2430 "altglyphdef": "altGlyphDef", 

2431 "altglyphitem": "altGlyphItem", 

2432 "animatecolor": "animateColor", 

2433 "animatemotion": "animateMotion", 

2434 "animatetransform": "animateTransform", 

2435 "clippath": "clipPath", 

2436 "feblend": "feBlend", 

2437 "fecolormatrix": "feColorMatrix", 

2438 "fecomponenttransfer": "feComponentTransfer", 

2439 "fecomposite": "feComposite", 

2440 "feconvolvematrix": "feConvolveMatrix", 

2441 "fediffuselighting": "feDiffuseLighting", 

2442 "fedisplacementmap": "feDisplacementMap", 

2443 "fedistantlight": "feDistantLight", 

2444 "feflood": "feFlood", 

2445 "fefunca": "feFuncA", 

2446 "fefuncb": "feFuncB", 

2447 "fefuncg": "feFuncG", 

2448 "fefuncr": "feFuncR", 

2449 "fegaussianblur": "feGaussianBlur", 

2450 "feimage": "feImage", 

2451 "femerge": "feMerge", 

2452 "femergenode": "feMergeNode", 

2453 "femorphology": "feMorphology", 

2454 "feoffset": "feOffset", 

2455 "fepointlight": "fePointLight", 

2456 "fespecularlighting": "feSpecularLighting", 

2457 "fespotlight": "feSpotLight", 

2458 "fetile": "feTile", 

2459 "feturbulence": "feTurbulence", 

2460 "foreignobject": "foreignObject", 

2461 "glyphref": "glyphRef", 

2462 "lineargradient": "linearGradient", 

2463 "radialgradient": "radialGradient", 

2464 "textpath": "textPath"} 

2465 

2466 if token["name"] in replacements: 

2467 token["name"] = replacements[token["name"]] 

2468 

2469 def processCharacters(self, token): 

2470 if token["data"] == "\u0000": 

2471 token["data"] = "\uFFFD" 

2472 elif (self.parser.framesetOK and 

2473 any(char not in spaceCharacters for char in token["data"])): 

2474 self.parser.framesetOK = False 

2475 Phase.processCharacters(self, token) 

2476 

2477 def processStartTag(self, token): 

2478 currentNode = self.tree.openElements[-1] 

2479 if (token["name"] in self.breakoutElements or 

2480 (token["name"] == "font" and 

2481 set(token["data"].keys()) & {"color", "face", "size"})): 

2482 self.parser.parseError("unexpected-html-element-in-foreign-content", 

2483 {"name": token["name"]}) 

2484 while (self.tree.openElements[-1].namespace != 

2485 self.tree.defaultNamespace and 

2486 not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and 

2487 not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])): 

2488 self.tree.openElements.pop() 

2489 return token 

2490 

2491 else: 

2492 if currentNode.namespace == namespaces["mathml"]: 

2493 self.parser.adjustMathMLAttributes(token) 

2494 elif currentNode.namespace == namespaces["svg"]: 

2495 self.adjustSVGTagNames(token) 

2496 self.parser.adjustSVGAttributes(token) 

2497 self.parser.adjustForeignAttributes(token) 

2498 token["namespace"] = currentNode.namespace 

2499 self.tree.insertElement(token) 

2500 if token["selfClosing"]: 

2501 self.tree.openElements.pop() 

2502 token["selfClosingAcknowledged"] = True 

2503 

2504 def processEndTag(self, token): 

2505 nodeIndex = len(self.tree.openElements) - 1 

2506 node = self.tree.openElements[-1] 

2507 if node.name.translate(asciiUpper2Lower) != token["name"]: 

2508 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) 

2509 

2510 while True: 

2511 if node.name.translate(asciiUpper2Lower) == token["name"]: 

2512 # XXX this isn't in the spec but it seems necessary 

2513 if self.parser.phase == self.parser.phases["inTableText"]: 

2514 self.parser.phase.flushCharacters() 

2515 self.parser.phase = self.parser.phase.originalPhase 

2516 while self.tree.openElements.pop() != node: 

2517 assert self.tree.openElements 

2518 new_token = None 

2519 break 

2520 nodeIndex -= 1 

2521 

2522 node = self.tree.openElements[nodeIndex] 

2523 if node.namespace != self.tree.defaultNamespace: 

2524 continue 

2525 else: 

2526 new_token = self.parser.phase.processEndTag(token) 

2527 break 

2528 return new_token 

2529 

2530 

2531class AfterBodyPhase(Phase): 

2532 __slots__ = tuple() 

2533 

2534 def processEOF(self): 

2535 # Stop parsing 

2536 pass 

2537 

2538 def processComment(self, token): 

2539 # This is needed because data is to be appended to the <html> element 

2540 # here and not to whatever is currently open. 

2541 self.tree.insertComment(token, self.tree.openElements[0]) 

2542 

2543 def processCharacters(self, token): 

2544 self.parser.parseError("unexpected-char-after-body") 

2545 self.parser.phase = self.parser.phases["inBody"] 

2546 return token 

2547 

2548 def startTagHtml(self, token): 

2549 return self.parser.phases["inBody"].processStartTag(token) 

2550 

2551 def startTagOther(self, token): 

2552 self.parser.parseError("unexpected-start-tag-after-body", 

2553 {"name": token["name"]}) 

2554 self.parser.phase = self.parser.phases["inBody"] 

2555 return token 

2556 

2557 def endTagHtml(self, name): 

2558 if self.parser.innerHTML: 

2559 self.parser.parseError("unexpected-end-tag-after-body-innerhtml") 

2560 else: 

2561 self.parser.phase = self.parser.phases["afterAfterBody"] 

2562 

2563 def endTagOther(self, token): 

2564 self.parser.parseError("unexpected-end-tag-after-body", 

2565 {"name": token["name"]}) 

2566 self.parser.phase = self.parser.phases["inBody"] 

2567 return token 

2568 

2569 startTagHandler = _utils.MethodDispatcher([ 

2570 ("html", startTagHtml) 

2571 ]) 

2572 startTagHandler.default = startTagOther 

2573 

2574 endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)]) 

2575 endTagHandler.default = endTagOther 

2576 

2577 

2578class InFramesetPhase(Phase): 

2579 # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset 

2580 __slots__ = tuple() 

2581 

2582 def processEOF(self): 

2583 if self.tree.openElements[-1].name != "html": 

2584 self.parser.parseError("eof-in-frameset") 

2585 else: 

2586 assert self.parser.innerHTML 

2587 

2588 def processCharacters(self, token): 

2589 self.parser.parseError("unexpected-char-in-frameset") 

2590 

2591 def startTagFrameset(self, token): 

2592 self.tree.insertElement(token) 

2593 

2594 def startTagFrame(self, token): 

2595 self.tree.insertElement(token) 

2596 self.tree.openElements.pop() 

2597 

2598 def startTagNoframes(self, token): 

2599 return self.parser.phases["inBody"].processStartTag(token) 

2600 

2601 def startTagOther(self, token): 

2602 self.parser.parseError("unexpected-start-tag-in-frameset", 

2603 {"name": token["name"]}) 

2604 

2605 def endTagFrameset(self, token): 

2606 if self.tree.openElements[-1].name == "html": 

2607 # innerHTML case 

2608 self.parser.parseError("unexpected-frameset-in-frameset-innerhtml") 

2609 else: 

2610 self.tree.openElements.pop() 

2611 if (not self.parser.innerHTML and 

2612 self.tree.openElements[-1].name != "frameset"): 

2613 # If we're not in innerHTML mode and the current node is not a 

2614 # "frameset" element (anymore) then switch. 

2615 self.parser.phase = self.parser.phases["afterFrameset"] 

2616 

2617 def endTagOther(self, token): 

2618 self.parser.parseError("unexpected-end-tag-in-frameset", 

2619 {"name": token["name"]}) 

2620 

2621 startTagHandler = _utils.MethodDispatcher([ 

2622 ("html", Phase.startTagHtml), 

2623 ("frameset", startTagFrameset), 

2624 ("frame", startTagFrame), 

2625 ("noframes", startTagNoframes) 

2626 ]) 

2627 startTagHandler.default = startTagOther 

2628 

2629 endTagHandler = _utils.MethodDispatcher([ 

2630 ("frameset", endTagFrameset) 

2631 ]) 

2632 endTagHandler.default = endTagOther 

2633 

2634 

2635class AfterFramesetPhase(Phase): 

2636 # http://www.whatwg.org/specs/web-apps/current-work/#after3 

2637 __slots__ = tuple() 

2638 

2639 def processEOF(self): 

2640 # Stop parsing 

2641 pass 

2642 

2643 def processCharacters(self, token): 

2644 self.parser.parseError("unexpected-char-after-frameset") 

2645 

2646 def startTagNoframes(self, token): 

2647 return self.parser.phases["inHead"].processStartTag(token) 

2648 

2649 def startTagOther(self, token): 

2650 self.parser.parseError("unexpected-start-tag-after-frameset", 

2651 {"name": token["name"]}) 

2652 

2653 def endTagHtml(self, token): 

2654 self.parser.phase = self.parser.phases["afterAfterFrameset"] 

2655 

2656 def endTagOther(self, token): 

2657 self.parser.parseError("unexpected-end-tag-after-frameset", 

2658 {"name": token["name"]}) 

2659 

2660 startTagHandler = _utils.MethodDispatcher([ 

2661 ("html", Phase.startTagHtml), 

2662 ("noframes", startTagNoframes) 

2663 ]) 

2664 startTagHandler.default = startTagOther 

2665 

2666 endTagHandler = _utils.MethodDispatcher([ 

2667 ("html", endTagHtml) 

2668 ]) 

2669 endTagHandler.default = endTagOther 

2670 

2671 

2672class AfterAfterBodyPhase(Phase): 

2673 __slots__ = tuple() 

2674 

2675 def processEOF(self): 

2676 pass 

2677 

2678 def processComment(self, token): 

2679 self.tree.insertComment(token, self.tree.document) 

2680 

2681 def processSpaceCharacters(self, token): 

2682 return self.parser.phases["inBody"].processSpaceCharacters(token) 

2683 

2684 def processCharacters(self, token): 

2685 self.parser.parseError("expected-eof-but-got-char") 

2686 self.parser.phase = self.parser.phases["inBody"] 

2687 return token 

2688 

2689 def startTagHtml(self, token): 

2690 return self.parser.phases["inBody"].processStartTag(token) 

2691 

2692 def startTagOther(self, token): 

2693 self.parser.parseError("expected-eof-but-got-start-tag", 

2694 {"name": token["name"]}) 

2695 self.parser.phase = self.parser.phases["inBody"] 

2696 return token 

2697 

2698 def processEndTag(self, token): 

2699 self.parser.parseError("expected-eof-but-got-end-tag", 

2700 {"name": token["name"]}) 

2701 self.parser.phase = self.parser.phases["inBody"] 

2702 return token 

2703 

2704 startTagHandler = _utils.MethodDispatcher([ 

2705 ("html", startTagHtml) 

2706 ]) 

2707 startTagHandler.default = startTagOther 

2708 

2709 

2710class AfterAfterFramesetPhase(Phase): 

2711 __slots__ = tuple() 

2712 

2713 def processEOF(self): 

2714 pass 

2715 

2716 def processComment(self, token): 

2717 self.tree.insertComment(token, self.tree.document) 

2718 

2719 def processSpaceCharacters(self, token): 

2720 return self.parser.phases["inBody"].processSpaceCharacters(token) 

2721 

2722 def processCharacters(self, token): 

2723 self.parser.parseError("expected-eof-but-got-char") 

2724 

2725 def startTagHtml(self, token): 

2726 return self.parser.phases["inBody"].processStartTag(token) 

2727 

2728 def startTagNoFrames(self, token): 

2729 return self.parser.phases["inHead"].processStartTag(token) 

2730 

2731 def startTagOther(self, token): 

2732 self.parser.parseError("expected-eof-but-got-start-tag", 

2733 {"name": token["name"]}) 

2734 

2735 def processEndTag(self, token): 

2736 self.parser.parseError("expected-eof-but-got-end-tag", 

2737 {"name": token["name"]}) 

2738 

2739 startTagHandler = _utils.MethodDispatcher([ 

2740 ("html", startTagHtml), 

2741 ("noframes", startTagNoFrames) 

2742 ]) 

2743 startTagHandler.default = startTagOther 

2744 

2745# pylint:enable=unused-argument 

2746 

2747 

2748_phases = { 

2749 "initial": InitialPhase, 

2750 "beforeHtml": BeforeHtmlPhase, 

2751 "beforeHead": BeforeHeadPhase, 

2752 "inHead": InHeadPhase, 

2753 "inHeadNoscript": InHeadNoscriptPhase, 

2754 "afterHead": AfterHeadPhase, 

2755 "inBody": InBodyPhase, 

2756 "text": TextPhase, 

2757 "inTable": InTablePhase, 

2758 "inTableText": InTableTextPhase, 

2759 "inCaption": InCaptionPhase, 

2760 "inColumnGroup": InColumnGroupPhase, 

2761 "inTableBody": InTableBodyPhase, 

2762 "inRow": InRowPhase, 

2763 "inCell": InCellPhase, 

2764 "inSelect": InSelectPhase, 

2765 "inSelectInTable": InSelectInTablePhase, 

2766 "inForeignContent": InForeignContentPhase, 

2767 "afterBody": AfterBodyPhase, 

2768 "inFrameset": InFramesetPhase, 

2769 "afterFrameset": AfterFramesetPhase, 

2770 "afterAfterBody": AfterAfterBodyPhase, 

2771 "afterAfterFrameset": AfterAfterFramesetPhase, 

2772 # XXX after after frameset 

2773} 

2774 

2775 

2776def adjust_attributes(token, replacements): 

2777 needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) 

2778 if needs_adjustment: 

2779 token['data'] = type(token['data'])((replacements.get(k, k), v) 

2780 for k, v in token['data'].items()) 

2781 

2782 

2783def impliedTagToken(name, type="EndTag", attributes=None, 

2784 selfClosing=False): 

2785 if attributes is None: 

2786 attributes = {} 

2787 return {"type": tokenTypes[type], "name": name, "data": attributes, 

2788 "selfClosing": selfClosing} 

2789 

2790 

2791class ParseError(Exception): 

2792 """Error in parsed document""" 

2793 pass