Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bleach/

1from __future__ import absolute_import, division, unicode_literals

2from six import with_metaclass, viewkeys

4import types

6from . import _inputstream

7from . import _tokenizer

9from . import treebuilders

10from .treebuilders.base import Marker

12from . import _utils

13from .constants import (

14 spaceCharacters, asciiUpper2Lower,

15 specialElements, headingElements, cdataElements, rcdataElements,

16 tokenTypes, tagTokenTypes,

17 namespaces,

18 htmlIntegrationPointElements, mathmlTextIntegrationPointElements,

19 adjustForeignAttributes as adjustForeignAttributesMap,

20 adjustMathMLAttributes, adjustSVGAttributes,

21 E,

22 _ReparseException

23)

26def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):

27 """Parse an HTML document as a string or file-like object into a tree

29 :arg doc: the document to parse as a string or file-like object

31 :arg treebuilder: the treebuilder to use when parsing

33 :arg namespaceHTMLElements: whether or not to namespace HTML elements

35 :returns: parsed tree

37 Example:

39 >>> from html5lib.html5parser import parse

40 >>> parse('<html><body>This is a doc</body></html>')

41 <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>

43 """

44 tb = treebuilders.getTreeBuilder(treebuilder)

45 p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)

46 return p.parse(doc, **kwargs)

49def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):

50 """Parse an HTML fragment as a string or file-like object into a tree

52 :arg doc: the fragment to parse as a string or file-like object

54 :arg container: the container context to parse the fragment in

56 :arg treebuilder: the treebuilder to use when parsing

58 :arg namespaceHTMLElements: whether or not to namespace HTML elements

60 :returns: parsed tree

62 Example:

64 >>> from html5lib.html5libparser import parseFragment

65 >>> parseFragment('this is a fragment')

66 <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>

68 """

69 tb = treebuilders.getTreeBuilder(treebuilder)

70 p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)

71 return p.parseFragment(doc, container=container, **kwargs)

74def method_decorator_metaclass(function):

75 class Decorated(type):

76 def __new__(meta, classname, bases, classDict):

77 for attributeName, attribute in classDict.items():

78 if isinstance(attribute, types.FunctionType):

79 attribute = function(attribute)

81 classDict[attributeName] = attribute

82 return type.__new__(meta, classname, bases, classDict)

83 return Decorated

86class HTMLParser(object):

87 """HTML parser

89 Generates a tree structure from a stream of (possibly malformed) HTML.

91 """

93 def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):

94 """

95 :arg tree: a treebuilder class controlling the type of tree that will be

96 returned. Built in treebuilders can be accessed through

97 html5lib.treebuilders.getTreeBuilder(treeType)

99 :arg strict: raise an exception when a parse error is encountered

100

101 :arg namespaceHTMLElements: whether or not to namespace HTML elements

102

103 :arg debug: whether or not to enable debug mode which logs things

104

105 Example:

106

107 >>> from html5lib.html5parser import HTMLParser

108 >>> parser = HTMLParser() # generates parser with etree builder

109 >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict

110

111 """

112

113 # Raise an exception on the first error encountered

114 self.strict = strict

115

116 if tree is None:

117 tree = treebuilders.getTreeBuilder("etree")

118 self.tree = tree(namespaceHTMLElements)

119 self.errors = []

120

121 self.phases = {name: cls(self, self.tree) for name, cls in

122 getPhases(debug).items()}

123

124 def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):

125

126 self.innerHTMLMode = innerHTML

127 self.container = container

128 self.scripting = scripting

129 self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs)

130 self.reset()

131

132 try:

133 self.mainLoop()

134 except _ReparseException:

135 self.reset()

136 self.mainLoop()

137

138 def reset(self):

139 self.tree.reset()

140 self.firstStartTag = False

141 self.errors = []

142 self.log = [] # only used with debug mode

143 # "quirks" / "limited quirks" / "no quirks"

144 self.compatMode = "no quirks"

145

146 if self.innerHTMLMode:

147 self.innerHTML = self.container.lower()

148

149 if self.innerHTML in cdataElements:

150 self.tokenizer.state = self.tokenizer.rcdataState

151 elif self.innerHTML in rcdataElements:

152 self.tokenizer.state = self.tokenizer.rawtextState

153 elif self.innerHTML == 'plaintext':

154 self.tokenizer.state = self.tokenizer.plaintextState

155 else:

156 # state already is data state

157 # self.tokenizer.state = self.tokenizer.dataState

158 pass

159 self.phase = self.phases["beforeHtml"]

160 self.phase.insertHtmlElement()

161 self.resetInsertionMode()

162 else:

163 self.innerHTML = False # pylint:disable=redefined-variable-type

164 self.phase = self.phases["initial"]

165

166 self.lastPhase = None

167

168 self.beforeRCDataPhase = None

169

170 self.framesetOK = True

171

172 @property

173 def documentEncoding(self):

174 """Name of the character encoding that was used to decode the input stream, or

175 :obj:`None` if that is not determined yet

176

177 """

178 if not hasattr(self, 'tokenizer'):

179 return None

180 return self.tokenizer.stream.charEncoding[0].name

181

182 def isHTMLIntegrationPoint(self, element):

183 if (element.name == "annotation-xml" and

184 element.namespace == namespaces["mathml"]):

185 return ("encoding" in element.attributes and

186 element.attributes["encoding"].translate(

187 asciiUpper2Lower) in

188 ("text/html", "application/xhtml+xml"))

189 else:

190 return (element.namespace, element.name) in htmlIntegrationPointElements

191

192 def isMathMLTextIntegrationPoint(self, element):

193 return (element.namespace, element.name) in mathmlTextIntegrationPointElements

194

195 def mainLoop(self):

196 CharactersToken = tokenTypes["Characters"]

197 SpaceCharactersToken = tokenTypes["SpaceCharacters"]

198 StartTagToken = tokenTypes["StartTag"]

199 EndTagToken = tokenTypes["EndTag"]

200 CommentToken = tokenTypes["Comment"]

201 DoctypeToken = tokenTypes["Doctype"]

202 ParseErrorToken = tokenTypes["ParseError"]

203

204 for token in self.tokenizer:

205 prev_token = None

206 new_token = token

207 while new_token is not None:

208 prev_token = new_token

209 currentNode = self.tree.openElements[-1] if self.tree.openElements else None

210 currentNodeNamespace = currentNode.namespace if currentNode else None

211 currentNodeName = currentNode.name if currentNode else None

212

213 type = new_token["type"]

214

215 if type == ParseErrorToken:

216 self.parseError(new_token["data"], new_token.get("datavars", {}))

217 new_token = None

218 else:

219 if (len(self.tree.openElements) == 0 or

220 currentNodeNamespace == self.tree.defaultNamespace or

221 (self.isMathMLTextIntegrationPoint(currentNode) and

222 ((type == StartTagToken and

223 token["name"] not in frozenset(["mglyph", "malignmark"])) or

224 type in (CharactersToken, SpaceCharactersToken))) or

225 (currentNodeNamespace == namespaces["mathml"] and

226 currentNodeName == "annotation-xml" and

227 type == StartTagToken and

228 token["name"] == "svg") or

229 (self.isHTMLIntegrationPoint(currentNode) and

230 type in (StartTagToken, CharactersToken, SpaceCharactersToken))):

231 phase = self.phase

232 else:

233 phase = self.phases["inForeignContent"]

234

235 if type == CharactersToken:

236 new_token = phase.processCharacters(new_token)

237 elif type == SpaceCharactersToken:

238 new_token = phase.processSpaceCharacters(new_token)

239 elif type == StartTagToken:

240 new_token = phase.processStartTag(new_token)

241 elif type == EndTagToken:

242 new_token = phase.processEndTag(new_token)

243 elif type == CommentToken:

244 new_token = phase.processComment(new_token)

245 elif type == DoctypeToken:

246 new_token = phase.processDoctype(new_token)

247

248 if (type == StartTagToken and prev_token["selfClosing"] and

249 not prev_token["selfClosingAcknowledged"]):

250 self.parseError("non-void-element-with-trailing-solidus",

251 {"name": prev_token["name"]})

252

253 # When the loop finishes it's EOF

254 reprocess = True

255 phases = []

256 while reprocess:

257 phases.append(self.phase)

258 reprocess = self.phase.processEOF()

259 if reprocess:

260 assert self.phase not in phases

261

262 def parse(self, stream, *args, **kwargs):

263 """Parse a HTML document into a well-formed tree

264

265 :arg stream: a file-like object or string containing the HTML to be parsed

266

267 The optional encoding parameter must be a string that indicates

268 the encoding. If specified, that encoding will be used,

269 regardless of any BOM or later declaration (such as in a meta

270 element).

271

272 :arg scripting: treat noscript elements as if JavaScript was turned on

273

274 :returns: parsed tree

275

276 Example:

277

278 >>> from html5lib.html5parser import HTMLParser

279 >>> parser = HTMLParser()

280 >>> parser.parse('<html><body>This is a doc</body></html>')

281 <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>

282

283 """

284 self._parse(stream, False, None, *args, **kwargs)

285 return self.tree.getDocument()

286

287 def parseFragment(self, stream, *args, **kwargs):

288 """Parse a HTML fragment into a well-formed tree fragment

289

290 :arg container: name of the element we're setting the innerHTML

291 property if set to None, default to 'div'

292

293 :arg stream: a file-like object or string containing the HTML to be parsed

294

295 The optional encoding parameter must be a string that indicates

296 the encoding. If specified, that encoding will be used,

297 regardless of any BOM or later declaration (such as in a meta

298 element)

299

300 :arg scripting: treat noscript elements as if JavaScript was turned on

301

302 :returns: parsed tree

303

304 Example:

305

306 >>> from html5lib.html5libparser import HTMLParser

307 >>> parser = HTMLParser()

308 >>> parser.parseFragment('this is a fragment')

309 <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>

310

311 """

312 self._parse(stream, True, *args, **kwargs)

313 return self.tree.getFragment()

314

315 def parseError(self, errorcode="XXX-undefined-error", datavars=None):

316 # XXX The idea is to make errorcode mandatory.

317 if datavars is None:

318 datavars = {}

319 self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))

320 if self.strict:

321 raise ParseError(E[errorcode] % datavars)

322

323 def adjustMathMLAttributes(self, token):

324 adjust_attributes(token, adjustMathMLAttributes)

325

326 def adjustSVGAttributes(self, token):

327 adjust_attributes(token, adjustSVGAttributes)

328

329 def adjustForeignAttributes(self, token):

330 adjust_attributes(token, adjustForeignAttributesMap)

331

332 def reparseTokenNormal(self, token):

333 # pylint:disable=unused-argument

334 self.parser.phase()

335

336 def resetInsertionMode(self):

337 # The name of this method is mostly historical. (It's also used in the

338 # specification.)

339 last = False

340 newModes = {

341 "select": "inSelect",

342 "td": "inCell",

343 "th": "inCell",

344 "tr": "inRow",

345 "tbody": "inTableBody",

346 "thead": "inTableBody",

347 "tfoot": "inTableBody",

348 "caption": "inCaption",

349 "colgroup": "inColumnGroup",

350 "table": "inTable",

351 "head": "inBody",

352 "body": "inBody",

353 "frameset": "inFrameset",

354 "html": "beforeHead"

355 }

356 for node in self.tree.openElements[::-1]:

357 nodeName = node.name

358 new_phase = None

359 if node == self.tree.openElements[0]:

360 assert self.innerHTML

361 last = True

362 nodeName = self.innerHTML

363 # Check for conditions that should only happen in the innerHTML

364 # case

365 if nodeName in ("select", "colgroup", "head", "html"):

366 assert self.innerHTML

367

368 if not last and node.namespace != self.tree.defaultNamespace:

369 continue

370

371 if nodeName in newModes:

372 new_phase = self.phases[newModes[nodeName]]

373 break

374 elif last:

375 new_phase = self.phases["inBody"]

376 break

377

378 self.phase = new_phase

379

380 def parseRCDataRawtext(self, token, contentType):

381 # Generic RCDATA/RAWTEXT Parsing algorithm

382 assert contentType in ("RAWTEXT", "RCDATA")

383

384 self.tree.insertElement(token)

385

386 if contentType == "RAWTEXT":

387 self.tokenizer.state = self.tokenizer.rawtextState

388 else:

389 self.tokenizer.state = self.tokenizer.rcdataState

390

391 self.originalPhase = self.phase

392

393 self.phase = self.phases["text"]

394

395

396@_utils.memoize

397def getPhases(debug):

398 def log(function):

399 """Logger that records which phase processes each token"""

400 type_names = {value: key for key, value in tokenTypes.items()}

401

402 def wrapped(self, *args, **kwargs):

403 if function.__name__.startswith("process") and len(args) > 0:

404 token = args[0]

405 info = {"type": type_names[token['type']]}

406 if token['type'] in tagTokenTypes:

407 info["name"] = token['name']

408

409 self.parser.log.append((self.parser.tokenizer.state.__name__,

410 self.parser.phase.__class__.__name__,

411 self.__class__.__name__,

412 function.__name__,

413 info))

414 return function(self, *args, **kwargs)

415 else:

416 return function(self, *args, **kwargs)

417 return wrapped

418

419 def getMetaclass(use_metaclass, metaclass_func):

420 if use_metaclass:

421 return method_decorator_metaclass(metaclass_func)

422 else:

423 return type

424

425 # pylint:disable=unused-argument

426 class Phase(with_metaclass(getMetaclass(debug, log))):

427 """Base class for helper object that implements each phase of processing

428 """

429 __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")

430

431 def __init__(self, parser, tree):

432 self.parser = parser

433 self.tree = tree

434 self.__startTagCache = {}

435 self.__endTagCache = {}

436

437 def processEOF(self):

438 raise NotImplementedError

439

440 def processComment(self, token):

441 # For most phases the following is correct. Where it's not it will be

442 # overridden.

443 self.tree.insertComment(token, self.tree.openElements[-1])

444

445 def processDoctype(self, token):

446 self.parser.parseError("unexpected-doctype")

447

448 def processCharacters(self, token):

449 self.tree.insertText(token["data"])

450

451 def processSpaceCharacters(self, token):

452 self.tree.insertText(token["data"])

453

454 def processStartTag(self, token):

455 # Note the caching is done here rather than BoundMethodDispatcher as doing it there

456 # requires a circular reference to the Phase, and this ends up with a significant

457 # (CPython 2.7, 3.8) GC cost when parsing many short inputs

458 name = token["name"]

459 # In Py2, using `in` is quicker in general than try/except KeyError

460 # In Py3, `in` is quicker when there are few cache hits (typically short inputs)

461 if name in self.__startTagCache:

462 func = self.__startTagCache[name]

463 else:

464 func = self.__startTagCache[name] = self.startTagHandler[name]

465 # bound the cache size in case we get loads of unknown tags

466 while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:

467 # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7

468 self.__startTagCache.pop(next(iter(self.__startTagCache)))

469 return func(token)

470

471 def startTagHtml(self, token):

472 if not self.parser.firstStartTag and token["name"] == "html":

473 self.parser.parseError("non-html-root")

474 # XXX Need a check here to see if the first start tag token emitted is

475 # this token... If it's not, invoke self.parser.parseError().

476 for attr, value in token["data"].items():

477 if attr not in self.tree.openElements[0].attributes:

478 self.tree.openElements[0].attributes[attr] = value

479 self.parser.firstStartTag = False

480

481 def processEndTag(self, token):

482 # Note the caching is done here rather than BoundMethodDispatcher as doing it there

483 # requires a circular reference to the Phase, and this ends up with a significant

484 # (CPython 2.7, 3.8) GC cost when parsing many short inputs

485 name = token["name"]

486 # In Py2, using `in` is quicker in general than try/except KeyError

487 # In Py3, `in` is quicker when there are few cache hits (typically short inputs)

488 if name in self.__endTagCache:

489 func = self.__endTagCache[name]

490 else:

491 func = self.__endTagCache[name] = self.endTagHandler[name]

492 # bound the cache size in case we get loads of unknown tags

493 while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:

494 # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7

495 self.__endTagCache.pop(next(iter(self.__endTagCache)))

496 return func(token)

497

498 class InitialPhase(Phase):

499 __slots__ = tuple()

500

501 def processSpaceCharacters(self, token):

502 pass

503

504 def processComment(self, token):

505 self.tree.insertComment(token, self.tree.document)

506

507 def processDoctype(self, token):

508 name = token["name"]

509 publicId = token["publicId"]

510 systemId = token["systemId"]

511 correct = token["correct"]

512

513 if (name != "html" or publicId is not None or

514 systemId is not None and systemId != "about:legacy-compat"):

515 self.parser.parseError("unknown-doctype")

516

517 if publicId is None:

518 publicId = ""

519

520 self.tree.insertDoctype(token)

521

522 if publicId != "":

523 publicId = publicId.translate(asciiUpper2Lower)

524

525 if (not correct or token["name"] != "html" or

526 publicId.startswith(

527 ("+//silmaril//dtd html pro v0r11 19970101//",

528 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",

529 "-//as//dtd html 3.0 aswedit + extensions//",

530 "-//ietf//dtd html 2.0 level 1//",

531 "-//ietf//dtd html 2.0 level 2//",

532 "-//ietf//dtd html 2.0 strict level 1//",

533 "-//ietf//dtd html 2.0 strict level 2//",

534 "-//ietf//dtd html 2.0 strict//",

535 "-//ietf//dtd html 2.0//",

536 "-//ietf//dtd html 2.1e//",

537 "-//ietf//dtd html 3.0//",

538 "-//ietf//dtd html 3.2 final//",

539 "-//ietf//dtd html 3.2//",

540 "-//ietf//dtd html 3//",

541 "-//ietf//dtd html level 0//",

542 "-//ietf//dtd html level 1//",

543 "-//ietf//dtd html level 2//",

544 "-//ietf//dtd html level 3//",

545 "-//ietf//dtd html strict level 0//",

546 "-//ietf//dtd html strict level 1//",

547 "-//ietf//dtd html strict level 2//",

548 "-//ietf//dtd html strict level 3//",

549 "-//ietf//dtd html strict//",

550 "-//ietf//dtd html//",

551 "-//metrius//dtd metrius presentational//",

552 "-//microsoft//dtd internet explorer 2.0 html strict//",

553 "-//microsoft//dtd internet explorer 2.0 html//",

554 "-//microsoft//dtd internet explorer 2.0 tables//",

555 "-//microsoft//dtd internet explorer 3.0 html strict//",

556 "-//microsoft//dtd internet explorer 3.0 html//",

557 "-//microsoft//dtd internet explorer 3.0 tables//",

558 "-//netscape comm. corp.//dtd html//",

559 "-//netscape comm. corp.//dtd strict html//",

560 "-//o'reilly and associates//dtd html 2.0//",

561 "-//o'reilly and associates//dtd html extended 1.0//",

562 "-//o'reilly and associates//dtd html extended relaxed 1.0//",

563 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",

564 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",

565 "-//spyglass//dtd html 2.0 extended//",

566 "-//sq//dtd html 2.0 hotmetal + extensions//",

567 "-//sun microsystems corp.//dtd hotjava html//",

568 "-//sun microsystems corp.//dtd hotjava strict html//",

569 "-//w3c//dtd html 3 1995-03-24//",

570 "-//w3c//dtd html 3.2 draft//",

571 "-//w3c//dtd html 3.2 final//",

572 "-//w3c//dtd html 3.2//",

573 "-//w3c//dtd html 3.2s draft//",

574 "-//w3c//dtd html 4.0 frameset//",

575 "-//w3c//dtd html 4.0 transitional//",

576 "-//w3c//dtd html experimental 19960712//",

577 "-//w3c//dtd html experimental 970421//",

578 "-//w3c//dtd w3 html//",

579 "-//w3o//dtd w3 html 3.0//",

580 "-//webtechs//dtd mozilla html 2.0//",

581 "-//webtechs//dtd mozilla html//")) or

582 publicId in ("-//w3o//dtd w3 html strict 3.0//en//",

583 "-/w3c/dtd html 4.0 transitional/en",

584 "html") or

585 publicId.startswith(

586 ("-//w3c//dtd html 4.01 frameset//",

587 "-//w3c//dtd html 4.01 transitional//")) and

588 systemId is None or

589 systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):

590 self.parser.compatMode = "quirks"

591 elif (publicId.startswith(

592 ("-//w3c//dtd xhtml 1.0 frameset//",

593 "-//w3c//dtd xhtml 1.0 transitional//")) or

594 publicId.startswith(

595 ("-//w3c//dtd html 4.01 frameset//",

596 "-//w3c//dtd html 4.01 transitional//")) and

597 systemId is not None):

598 self.parser.compatMode = "limited quirks"

599

600 self.parser.phase = self.parser.phases["beforeHtml"]

601

602 def anythingElse(self):

603 self.parser.compatMode = "quirks"

604 self.parser.phase = self.parser.phases["beforeHtml"]

605

606 def processCharacters(self, token):

607 self.parser.parseError("expected-doctype-but-got-chars")

608 self.anythingElse()

609 return token

610

611 def processStartTag(self, token):

612 self.parser.parseError("expected-doctype-but-got-start-tag",

613 {"name": token["name"]})

614 self.anythingElse()

615 return token

616

617 def processEndTag(self, token):

618 self.parser.parseError("expected-doctype-but-got-end-tag",

619 {"name": token["name"]})

620 self.anythingElse()

621 return token

622

623 def processEOF(self):

624 self.parser.parseError("expected-doctype-but-got-eof")

625 self.anythingElse()

626 return True

627

628 class BeforeHtmlPhase(Phase):

629 __slots__ = tuple()

630

631 # helper methods

632 def insertHtmlElement(self):

633 self.tree.insertRoot(impliedTagToken("html", "StartTag"))

634 self.parser.phase = self.parser.phases["beforeHead"]

635

636 # other

637 def processEOF(self):

638 self.insertHtmlElement()

639 return True

640

641 def processComment(self, token):

642 self.tree.insertComment(token, self.tree.document)

643

644 def processSpaceCharacters(self, token):

645 pass

646

647 def processCharacters(self, token):

648 self.insertHtmlElement()

649 return token

650

651 def processStartTag(self, token):

652 if token["name"] == "html":

653 self.parser.firstStartTag = True

654 self.insertHtmlElement()

655 return token

656

657 def processEndTag(self, token):

658 if token["name"] not in ("head", "body", "html", "br"):

659 self.parser.parseError("unexpected-end-tag-before-html",

660 {"name": token["name"]})

661 else:

662 self.insertHtmlElement()

663 return token

664

665 class BeforeHeadPhase(Phase):

666 __slots__ = tuple()

667

668 def processEOF(self):

669 self.startTagHead(impliedTagToken("head", "StartTag"))

670 return True

671

672 def processSpaceCharacters(self, token):

673 pass

674

675 def processCharacters(self, token):

676 self.startTagHead(impliedTagToken("head", "StartTag"))

677 return token

678

679 def startTagHtml(self, token):

680 return self.parser.phases["inBody"].processStartTag(token)

681

682 def startTagHead(self, token):

683 self.tree.insertElement(token)

684 self.tree.headPointer = self.tree.openElements[-1]

685 self.parser.phase = self.parser.phases["inHead"]

686

687 def startTagOther(self, token):

688 self.startTagHead(impliedTagToken("head", "StartTag"))

689 return token

690

691 def endTagImplyHead(self, token):

692 self.startTagHead(impliedTagToken("head", "StartTag"))

693 return token

694

695 def endTagOther(self, token):

696 self.parser.parseError("end-tag-after-implied-root",

697 {"name": token["name"]})

698

699 startTagHandler = _utils.MethodDispatcher([

700 ("html", startTagHtml),

701 ("head", startTagHead)

702 ])

703 startTagHandler.default = startTagOther

704

705 endTagHandler = _utils.MethodDispatcher([

706 (("head", "body", "html", "br"), endTagImplyHead)

707 ])

708 endTagHandler.default = endTagOther

709

710 class InHeadPhase(Phase):

711 __slots__ = tuple()

712

713 # the real thing

714 def processEOF(self):

715 self.anythingElse()

716 return True

717

718 def processCharacters(self, token):

719 self.anythingElse()

720 return token

721

722 def startTagHtml(self, token):

723 return self.parser.phases["inBody"].processStartTag(token)

724

725 def startTagHead(self, token):

726 self.parser.parseError("two-heads-are-not-better-than-one")

727

728 def startTagBaseLinkCommand(self, token):

729 self.tree.insertElement(token)

730 self.tree.openElements.pop()

731 token["selfClosingAcknowledged"] = True

732

733 def startTagMeta(self, token):

734 self.tree.insertElement(token)

735 self.tree.openElements.pop()

736 token["selfClosingAcknowledged"] = True

737

738 attributes = token["data"]

739 if self.parser.tokenizer.stream.charEncoding[1] == "tentative":

740 if "charset" in attributes:

741 self.parser.tokenizer.stream.changeEncoding(attributes["charset"])

742 elif ("content" in attributes and

743 "http-equiv" in attributes and

744 attributes["http-equiv"].lower() == "content-type"):

745 # Encoding it as UTF-8 here is a hack, as really we should pass

746 # the abstract Unicode string, and just use the

747 # ContentAttrParser on that, but using UTF-8 allows all chars

748 # to be encoded and as a ASCII-superset works.

749 data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))

750 parser = _inputstream.ContentAttrParser(data)

751 codec = parser.parse()

752 self.parser.tokenizer.stream.changeEncoding(codec)

753

754 def startTagTitle(self, token):

755 self.parser.parseRCDataRawtext(token, "RCDATA")

756

757 def startTagNoFramesStyle(self, token):

758 # Need to decide whether to implement the scripting-disabled case

759 self.parser.parseRCDataRawtext(token, "RAWTEXT")

760

761 def startTagNoscript(self, token):

762 if self.parser.scripting:

763 self.parser.parseRCDataRawtext(token, "RAWTEXT")

764 else:

765 self.tree.insertElement(token)

766 self.parser.phase = self.parser.phases["inHeadNoscript"]

767

768 def startTagScript(self, token):

769 self.tree.insertElement(token)

770 self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState

771 self.parser.originalPhase = self.parser.phase

772 self.parser.phase = self.parser.phases["text"]

773

774 def startTagOther(self, token):

775 self.anythingElse()

776 return token

777

778 def endTagHead(self, token):

779 node = self.parser.tree.openElements.pop()

780 assert node.name == "head", "Expected head got %s" % node.name

781 self.parser.phase = self.parser.phases["afterHead"]

782

783 def endTagHtmlBodyBr(self, token):

784 self.anythingElse()

785 return token

786

787 def endTagOther(self, token):

788 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

789

790 def anythingElse(self):

791 self.endTagHead(impliedTagToken("head"))

792

793 startTagHandler = _utils.MethodDispatcher([

794 ("html", startTagHtml),

795 ("title", startTagTitle),

796 (("noframes", "style"), startTagNoFramesStyle),

797 ("noscript", startTagNoscript),

798 ("script", startTagScript),

799 (("base", "basefont", "bgsound", "command", "link"),

800 startTagBaseLinkCommand),

801 ("meta", startTagMeta),

802 ("head", startTagHead)

803 ])

804 startTagHandler.default = startTagOther

805

806 endTagHandler = _utils.MethodDispatcher([

807 ("head", endTagHead),

808 (("br", "html", "body"), endTagHtmlBodyBr)

809 ])

810 endTagHandler.default = endTagOther

811

812 class InHeadNoscriptPhase(Phase):

813 __slots__ = tuple()

814

815 def processEOF(self):

816 self.parser.parseError("eof-in-head-noscript")

817 self.anythingElse()

818 return True

819

820 def processComment(self, token):

821 return self.parser.phases["inHead"].processComment(token)

822

823 def processCharacters(self, token):

824 self.parser.parseError("char-in-head-noscript")

825 self.anythingElse()

826 return token

827

828 def processSpaceCharacters(self, token):

829 return self.parser.phases["inHead"].processSpaceCharacters(token)

830

831 def startTagHtml(self, token):

832 return self.parser.phases["inBody"].processStartTag(token)

833

834 def startTagBaseLinkCommand(self, token):

835 return self.parser.phases["inHead"].processStartTag(token)

836

837 def startTagHeadNoscript(self, token):

838 self.parser.parseError("unexpected-start-tag", {"name": token["name"]})

839

840 def startTagOther(self, token):

841 self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})

842 self.anythingElse()

843 return token

844

845 def endTagNoscript(self, token):

846 node = self.parser.tree.openElements.pop()

847 assert node.name == "noscript", "Expected noscript got %s" % node.name

848 self.parser.phase = self.parser.phases["inHead"]

849

850 def endTagBr(self, token):

851 self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})

852 self.anythingElse()

853 return token

854

855 def endTagOther(self, token):

856 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

857

858 def anythingElse(self):

859 # Caller must raise parse error first!

860 self.endTagNoscript(impliedTagToken("noscript"))

861

862 startTagHandler = _utils.MethodDispatcher([

863 ("html", startTagHtml),

864 (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),

865 (("head", "noscript"), startTagHeadNoscript),

866 ])

867 startTagHandler.default = startTagOther

868

869 endTagHandler = _utils.MethodDispatcher([

870 ("noscript", endTagNoscript),

871 ("br", endTagBr),

872 ])

873 endTagHandler.default = endTagOther

874

875 class AfterHeadPhase(Phase):

876 __slots__ = tuple()

877

878 def processEOF(self):

879 self.anythingElse()

880 return True

881

882 def processCharacters(self, token):

883 self.anythingElse()

884 return token

885

886 def startTagHtml(self, token):

887 return self.parser.phases["inBody"].processStartTag(token)

888

889 def startTagBody(self, token):

890 self.parser.framesetOK = False

891 self.tree.insertElement(token)

892 self.parser.phase = self.parser.phases["inBody"]

893

894 def startTagFrameset(self, token):

895 self.tree.insertElement(token)

896 self.parser.phase = self.parser.phases["inFrameset"]

897

898 def startTagFromHead(self, token):

899 self.parser.parseError("unexpected-start-tag-out-of-my-head",

900 {"name": token["name"]})

901 self.tree.openElements.append(self.tree.headPointer)

902 self.parser.phases["inHead"].processStartTag(token)

903 for node in self.tree.openElements[::-1]:

904 if node.name == "head":

905 self.tree.openElements.remove(node)

906 break

907

908 def startTagHead(self, token):

909 self.parser.parseError("unexpected-start-tag", {"name": token["name"]})

910

911 def startTagOther(self, token):

912 self.anythingElse()

913 return token

914

915 def endTagHtmlBodyBr(self, token):

916 self.anythingElse()

917 return token

918

919 def endTagOther(self, token):

920 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

921

922 def anythingElse(self):

923 self.tree.insertElement(impliedTagToken("body", "StartTag"))

924 self.parser.phase = self.parser.phases["inBody"]

925 self.parser.framesetOK = True

926

927 startTagHandler = _utils.MethodDispatcher([

928 ("html", startTagHtml),

929 ("body", startTagBody),

930 ("frameset", startTagFrameset),

931 (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",

932 "style", "title"),

933 startTagFromHead),

934 ("head", startTagHead)

935 ])

936 startTagHandler.default = startTagOther

937 endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),

938 endTagHtmlBodyBr)])

939 endTagHandler.default = endTagOther

940

941 class InBodyPhase(Phase):

942 # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody

943 # the really-really-really-very crazy mode

944 __slots__ = ("processSpaceCharacters",)

945

946 def __init__(self, *args, **kwargs):

947 super(InBodyPhase, self).__init__(*args, **kwargs)

948 # Set this to the default handler

949 self.processSpaceCharacters = self.processSpaceCharactersNonPre

950

951 def isMatchingFormattingElement(self, node1, node2):

952 return (node1.name == node2.name and

953 node1.namespace == node2.namespace and

954 node1.attributes == node2.attributes)

955

956 # helper

957 def addFormattingElement(self, token):

958 self.tree.insertElement(token)

959 element = self.tree.openElements[-1]

960

961 matchingElements = []

962 for node in self.tree.activeFormattingElements[::-1]:

963 if node is Marker:

964 break

965 elif self.isMatchingFormattingElement(node, element):

966 matchingElements.append(node)

967

968 assert len(matchingElements) <= 3

969 if len(matchingElements) == 3:

970 self.tree.activeFormattingElements.remove(matchingElements[-1])

971 self.tree.activeFormattingElements.append(element)

972

973 # the real deal

974 def processEOF(self):

975 allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",

976 "tfoot", "th", "thead", "tr", "body",

977 "html"))

978 for node in self.tree.openElements[::-1]:

979 if node.name not in allowed_elements:

980 self.parser.parseError("expected-closing-tag-but-got-eof")

981 break

982 # Stop parsing

983

984 def processSpaceCharactersDropNewline(self, token):

985 # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we

986 # want to drop leading newlines

987 data = token["data"]

988 self.processSpaceCharacters = self.processSpaceCharactersNonPre

989 if (data.startswith("\n") and

990 self.tree.openElements[-1].name in ("pre", "listing", "textarea") and

991 not self.tree.openElements[-1].hasContent()):

992 data = data[1:]

993 if data:

994 self.tree.reconstructActiveFormattingElements()

995 self.tree.insertText(data)

996

997 def processCharacters(self, token):

998 if token["data"] == "\u0000":

999 # The tokenizer should always emit null on its own

1000 return

1001 self.tree.reconstructActiveFormattingElements()

1002 self.tree.insertText(token["data"])

1003 # This must be bad for performance

1004 if (self.parser.framesetOK and

1005 any([char not in spaceCharacters

1006 for char in token["data"]])):

1007 self.parser.framesetOK = False

1008

1009 def processSpaceCharactersNonPre(self, token):

1010 self.tree.reconstructActiveFormattingElements()

1011 self.tree.insertText(token["data"])

1012

1013 def startTagProcessInHead(self, token):

1014 return self.parser.phases["inHead"].processStartTag(token)

1015

1016 def startTagBody(self, token):

1017 self.parser.parseError("unexpected-start-tag", {"name": "body"})

1018 if (len(self.tree.openElements) == 1 or

1019 self.tree.openElements[1].name != "body"):

1020 assert self.parser.innerHTML

1021 else:

1022 self.parser.framesetOK = False

1023 for attr, value in token["data"].items():

1024 if attr not in self.tree.openElements[1].attributes:

1025 self.tree.openElements[1].attributes[attr] = value

1026

1027 def startTagFrameset(self, token):

1028 self.parser.parseError("unexpected-start-tag", {"name": "frameset"})

1029 if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):

1030 assert self.parser.innerHTML

1031 elif not self.parser.framesetOK:

1032 pass

1033 else:

1034 if self.tree.openElements[1].parent:

1035 self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])

1036 while self.tree.openElements[-1].name != "html":

1037 self.tree.openElements.pop()

1038 self.tree.insertElement(token)

1039 self.parser.phase = self.parser.phases["inFrameset"]

1040

1041 def startTagCloseP(self, token):

1042 if self.tree.elementInScope("p", variant="button"):

1043 self.endTagP(impliedTagToken("p"))

1044 self.tree.insertElement(token)

1045

1046 def startTagPreListing(self, token):

1047 if self.tree.elementInScope("p", variant="button"):

1048 self.endTagP(impliedTagToken("p"))

1049 self.tree.insertElement(token)

1050 self.parser.framesetOK = False

1051 self.processSpaceCharacters = self.processSpaceCharactersDropNewline

1052

1053 def startTagForm(self, token):

1054 if self.tree.formPointer:

1055 self.parser.parseError("unexpected-start-tag", {"name": "form"})

1056 else:

1057 if self.tree.elementInScope("p", variant="button"):

1058 self.endTagP(impliedTagToken("p"))

1059 self.tree.insertElement(token)

1060 self.tree.formPointer = self.tree.openElements[-1]

1061

1062 def startTagListItem(self, token):

1063 self.parser.framesetOK = False

1064

1065 stopNamesMap = {"li": ["li"],

1066 "dt": ["dt", "dd"],

1067 "dd": ["dt", "dd"]}

1068 stopNames = stopNamesMap[token["name"]]

1069 for node in reversed(self.tree.openElements):

1070 if node.name in stopNames:

1071 self.parser.phase.processEndTag(

1072 impliedTagToken(node.name, "EndTag"))

1073 break

1074 if (node.nameTuple in specialElements and

1075 node.name not in ("address", "div", "p")):

1076 break

1077

1078 if self.tree.elementInScope("p", variant="button"):

1079 self.parser.phase.processEndTag(

1080 impliedTagToken("p", "EndTag"))

1081

1082 self.tree.insertElement(token)

1083

1084 def startTagPlaintext(self, token):

1085 if self.tree.elementInScope("p", variant="button"):

1086 self.endTagP(impliedTagToken("p"))

1087 self.tree.insertElement(token)

1088 self.parser.tokenizer.state = self.parser.tokenizer.plaintextState

1089

1090 def startTagHeading(self, token):

1091 if self.tree.elementInScope("p", variant="button"):

1092 self.endTagP(impliedTagToken("p"))

1093 if self.tree.openElements[-1].name in headingElements:

1094 self.parser.parseError("unexpected-start-tag", {"name": token["name"]})

1095 self.tree.openElements.pop()

1096 self.tree.insertElement(token)

1097

1098 def startTagA(self, token):

1099 afeAElement = self.tree.elementInActiveFormattingElements("a")

1100 if afeAElement:

1101 self.parser.parseError("unexpected-start-tag-implies-end-tag",

1102 {"startName": "a", "endName": "a"})

1103 self.endTagFormatting(impliedTagToken("a"))

1104 if afeAElement in self.tree.openElements:

1105 self.tree.openElements.remove(afeAElement)

1106 if afeAElement in self.tree.activeFormattingElements:

1107 self.tree.activeFormattingElements.remove(afeAElement)

1108 self.tree.reconstructActiveFormattingElements()

1109 self.addFormattingElement(token)

1110

1111 def startTagFormatting(self, token):

1112 self.tree.reconstructActiveFormattingElements()

1113 self.addFormattingElement(token)

1114

1115 def startTagNobr(self, token):

1116 self.tree.reconstructActiveFormattingElements()

1117 if self.tree.elementInScope("nobr"):

1118 self.parser.parseError("unexpected-start-tag-implies-end-tag",

1119 {"startName": "nobr", "endName": "nobr"})

1120 self.processEndTag(impliedTagToken("nobr"))

1121 # XXX Need tests that trigger the following

1122 self.tree.reconstructActiveFormattingElements()

1123 self.addFormattingElement(token)

1124

1125 def startTagButton(self, token):

1126 if self.tree.elementInScope("button"):

1127 self.parser.parseError("unexpected-start-tag-implies-end-tag",

1128 {"startName": "button", "endName": "button"})

1129 self.processEndTag(impliedTagToken("button"))

1130 return token

1131 else:

1132 self.tree.reconstructActiveFormattingElements()

1133 self.tree.insertElement(token)

1134 self.parser.framesetOK = False

1135

1136 def startTagAppletMarqueeObject(self, token):

1137 self.tree.reconstructActiveFormattingElements()

1138 self.tree.insertElement(token)

1139 self.tree.activeFormattingElements.append(Marker)

1140 self.parser.framesetOK = False

1141

1142 def startTagXmp(self, token):

1143 if self.tree.elementInScope("p", variant="button"):

1144 self.endTagP(impliedTagToken("p"))

1145 self.tree.reconstructActiveFormattingElements()

1146 self.parser.framesetOK = False

1147 self.parser.parseRCDataRawtext(token, "RAWTEXT")

1148

1149 def startTagTable(self, token):

1150 if self.parser.compatMode != "quirks":

1151 if self.tree.elementInScope("p", variant="button"):

1152 self.processEndTag(impliedTagToken("p"))

1153 self.tree.insertElement(token)

1154 self.parser.framesetOK = False

1155 self.parser.phase = self.parser.phases["inTable"]

1156

1157 def startTagVoidFormatting(self, token):

1158 self.tree.reconstructActiveFormattingElements()

1159 self.tree.insertElement(token)

1160 self.tree.openElements.pop()

1161 token["selfClosingAcknowledged"] = True

1162 self.parser.framesetOK = False

1163

1164 def startTagInput(self, token):

1165 framesetOK = self.parser.framesetOK

1166 self.startTagVoidFormatting(token)

1167 if ("type" in token["data"] and

1168 token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):

1169 # input type=hidden doesn't change framesetOK

1170 self.parser.framesetOK = framesetOK

1171

1172 def startTagParamSource(self, token):

1173 self.tree.insertElement(token)

1174 self.tree.openElements.pop()

1175 token["selfClosingAcknowledged"] = True

1176

1177 def startTagHr(self, token):

1178 if self.tree.elementInScope("p", variant="button"):

1179 self.endTagP(impliedTagToken("p"))

1180 self.tree.insertElement(token)

1181 self.tree.openElements.pop()

1182 token["selfClosingAcknowledged"] = True

1183 self.parser.framesetOK = False

1184

1185 def startTagImage(self, token):

1186 # No really...

1187 self.parser.parseError("unexpected-start-tag-treated-as",

1188 {"originalName": "image", "newName": "img"})

1189 self.processStartTag(impliedTagToken("img", "StartTag",

1190 attributes=token["data"],

1191 selfClosing=token["selfClosing"]))

1192

1193 def startTagIsIndex(self, token):

1194 self.parser.parseError("deprecated-tag", {"name": "isindex"})

1195 if self.tree.formPointer:

1196 return

1197 form_attrs = {}

1198 if "action" in token["data"]:

1199 form_attrs["action"] = token["data"]["action"]

1200 self.processStartTag(impliedTagToken("form", "StartTag",

1201 attributes=form_attrs))

1202 self.processStartTag(impliedTagToken("hr", "StartTag"))

1203 self.processStartTag(impliedTagToken("label", "StartTag"))

1204 # XXX Localization ...

1205 if "prompt" in token["data"]:

1206 prompt = token["data"]["prompt"]

1207 else:

1208 prompt = "This is a searchable index. Enter search keywords: "

1209 self.processCharacters(

1210 {"type": tokenTypes["Characters"], "data": prompt})

1211 attributes = token["data"].copy()

1212 if "action" in attributes:

1213 del attributes["action"]

1214 if "prompt" in attributes:

1215 del attributes["prompt"]

1216 attributes["name"] = "isindex"

1217 self.processStartTag(impliedTagToken("input", "StartTag",

1218 attributes=attributes,

1219 selfClosing=token["selfClosing"]))

1220 self.processEndTag(impliedTagToken("label"))

1221 self.processStartTag(impliedTagToken("hr", "StartTag"))

1222 self.processEndTag(impliedTagToken("form"))

1223

1224 def startTagTextarea(self, token):

1225 self.tree.insertElement(token)

1226 self.parser.tokenizer.state = self.parser.tokenizer.rcdataState

1227 self.processSpaceCharacters = self.processSpaceCharactersDropNewline

1228 self.parser.framesetOK = False

1229

1230 def startTagIFrame(self, token):

1231 self.parser.framesetOK = False

1232 self.startTagRawtext(token)

1233

1234 def startTagNoscript(self, token):

1235 if self.parser.scripting:

1236 self.startTagRawtext(token)

1237 else:

1238 self.startTagOther(token)

1239

1240 def startTagRawtext(self, token):

1241 """iframe, noembed noframes, noscript(if scripting enabled)"""

1242 self.parser.parseRCDataRawtext(token, "RAWTEXT")

1243

1244 def startTagOpt(self, token):

1245 if self.tree.openElements[-1].name == "option":

1246 self.parser.phase.processEndTag(impliedTagToken("option"))

1247 self.tree.reconstructActiveFormattingElements()

1248 self.parser.tree.insertElement(token)

1249

1250 def startTagSelect(self, token):

1251 self.tree.reconstructActiveFormattingElements()

1252 self.tree.insertElement(token)

1253 self.parser.framesetOK = False

1254 if self.parser.phase in (self.parser.phases["inTable"],

1255 self.parser.phases["inCaption"],

1256 self.parser.phases["inColumnGroup"],

1257 self.parser.phases["inTableBody"],

1258 self.parser.phases["inRow"],

1259 self.parser.phases["inCell"]):

1260 self.parser.phase = self.parser.phases["inSelectInTable"]

1261 else:

1262 self.parser.phase = self.parser.phases["inSelect"]

1263

1264 def startTagRpRt(self, token):

1265 if self.tree.elementInScope("ruby"):

1266 self.tree.generateImpliedEndTags()

1267 if self.tree.openElements[-1].name != "ruby":

1268 self.parser.parseError()

1269 self.tree.insertElement(token)

1270

1271 def startTagMath(self, token):

1272 self.tree.reconstructActiveFormattingElements()

1273 self.parser.adjustMathMLAttributes(token)

1274 self.parser.adjustForeignAttributes(token)

1275 token["namespace"] = namespaces["mathml"]

1276 self.tree.insertElement(token)

1277 # Need to get the parse error right for the case where the token

1278 # has a namespace not equal to the xmlns attribute

1279 if token["selfClosing"]:

1280 self.tree.openElements.pop()

1281 token["selfClosingAcknowledged"] = True

1282

1283 def startTagSvg(self, token):

1284 self.tree.reconstructActiveFormattingElements()

1285 self.parser.adjustSVGAttributes(token)

1286 self.parser.adjustForeignAttributes(token)

1287 token["namespace"] = namespaces["svg"]

1288 self.tree.insertElement(token)

1289 # Need to get the parse error right for the case where the token

1290 # has a namespace not equal to the xmlns attribute

1291 if token["selfClosing"]:

1292 self.tree.openElements.pop()

1293 token["selfClosingAcknowledged"] = True

1294

1295 def startTagMisplaced(self, token):

1296 """ Elements that should be children of other elements that have a

1297 different insertion mode; here they are ignored

1298 "caption", "col", "colgroup", "frame", "frameset", "head",

1299 "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",

1300 "tr", "noscript"

1301 """

1302 self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})

1303

1304 def startTagOther(self, token):

1305 self.tree.reconstructActiveFormattingElements()

1306 self.tree.insertElement(token)

1307

1308 def endTagP(self, token):

1309 if not self.tree.elementInScope("p", variant="button"):

1310 self.startTagCloseP(impliedTagToken("p", "StartTag"))

1311 self.parser.parseError("unexpected-end-tag", {"name": "p"})

1312 self.endTagP(impliedTagToken("p", "EndTag"))

1313 else:

1314 self.tree.generateImpliedEndTags("p")

1315 if self.tree.openElements[-1].name != "p":

1316 self.parser.parseError("unexpected-end-tag", {"name": "p"})

1317 node = self.tree.openElements.pop()

1318 while node.name != "p":

1319 node = self.tree.openElements.pop()

1320

1321 def endTagBody(self, token):

1322 if not self.tree.elementInScope("body"):

1323 self.parser.parseError()

1324 return

1325 elif self.tree.openElements[-1].name != "body":

1326 for node in self.tree.openElements[2:]:

1327 if node.name not in frozenset(("dd", "dt", "li", "optgroup",

1328 "option", "p", "rp", "rt",

1329 "tbody", "td", "tfoot",

1330 "th", "thead", "tr", "body",

1331 "html")):

1332 # Not sure this is the correct name for the parse error

1333 self.parser.parseError(

1334 "expected-one-end-tag-but-got-another",

1335 {"gotName": "body", "expectedName": node.name})

1336 break

1337 self.parser.phase = self.parser.phases["afterBody"]

1338

1339 def endTagHtml(self, token):

1340 # We repeat the test for the body end tag token being ignored here

1341 if self.tree.elementInScope("body"):

1342 self.endTagBody(impliedTagToken("body"))

1343 return token

1344

1345 def endTagBlock(self, token):

1346 # Put us back in the right whitespace handling mode

1347 if token["name"] == "pre":

1348 self.processSpaceCharacters = self.processSpaceCharactersNonPre

1349 inScope = self.tree.elementInScope(token["name"])

1350 if inScope:

1351 self.tree.generateImpliedEndTags()

1352 if self.tree.openElements[-1].name != token["name"]:

1353 self.parser.parseError("end-tag-too-early", {"name": token["name"]})

1354 if inScope:

1355 node = self.tree.openElements.pop()

1356 while node.name != token["name"]:

1357 node = self.tree.openElements.pop()

1358

1359 def endTagForm(self, token):

1360 node = self.tree.formPointer

1361 self.tree.formPointer = None

1362 if node is None or not self.tree.elementInScope(node):

1363 self.parser.parseError("unexpected-end-tag",

1364 {"name": "form"})

1365 else:

1366 self.tree.generateImpliedEndTags()

1367 if self.tree.openElements[-1] != node:

1368 self.parser.parseError("end-tag-too-early-ignored",

1369 {"name": "form"})

1370 self.tree.openElements.remove(node)

1371

1372 def endTagListItem(self, token):

1373 if token["name"] == "li":

1374 variant = "list"

1375 else:

1376 variant = None

1377 if not self.tree.elementInScope(token["name"], variant=variant):

1378 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

1379 else:

1380 self.tree.generateImpliedEndTags(exclude=token["name"])

1381 if self.tree.openElements[-1].name != token["name"]:

1382 self.parser.parseError(

1383 "end-tag-too-early",

1384 {"name": token["name"]})

1385 node = self.tree.openElements.pop()

1386 while node.name != token["name"]:

1387 node = self.tree.openElements.pop()

1388

1389 def endTagHeading(self, token):

1390 for item in headingElements:

1391 if self.tree.elementInScope(item):

1392 self.tree.generateImpliedEndTags()

1393 break

1394 if self.tree.openElements[-1].name != token["name"]:

1395 self.parser.parseError("end-tag-too-early", {"name": token["name"]})

1396

1397 for item in headingElements:

1398 if self.tree.elementInScope(item):

1399 item = self.tree.openElements.pop()

1400 while item.name not in headingElements:

1401 item = self.tree.openElements.pop()

1402 break

1403

1404 def endTagFormatting(self, token):

1405 """The much-feared adoption agency algorithm"""

1406 # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867

1407 # XXX Better parseError messages appreciated.

1408

1409 # Step 1

1410 outerLoopCounter = 0

1411

1412 # Step 2

1413 while outerLoopCounter < 8:

1414

1415 # Step 3

1416 outerLoopCounter += 1

1417

1418 # Step 4:

1419

1420 # Let the formatting element be the last element in

1421 # the list of active formatting elements that:

1422 # - is between the end of the list and the last scope

1423 # marker in the list, if any, or the start of the list

1424 # otherwise, and

1425 # - has the same tag name as the token.

1426 formattingElement = self.tree.elementInActiveFormattingElements(

1427 token["name"])

1428 if (not formattingElement or

1429 (formattingElement in self.tree.openElements and

1430 not self.tree.elementInScope(formattingElement.name))):

1431 # If there is no such node, then abort these steps

1432 # and instead act as described in the "any other

1433 # end tag" entry below.

1434 self.endTagOther(token)

1435 return

1436

1437 # Otherwise, if there is such a node, but that node is

1438 # not in the stack of open elements, then this is a

1439 # parse error; remove the element from the list, and

1440 # abort these steps.

1441 elif formattingElement not in self.tree.openElements:

1442 self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})

1443 self.tree.activeFormattingElements.remove(formattingElement)

1444 return

1445

1446 # Otherwise, if there is such a node, and that node is

1447 # also in the stack of open elements, but the element

1448 # is not in scope, then this is a parse error; ignore

1449 # the token, and abort these steps.

1450 elif not self.tree.elementInScope(formattingElement.name):

1451 self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})

1452 return

1453

1454 # Otherwise, there is a formatting element and that

1455 # element is in the stack and is in scope. If the

1456 # element is not the current node, this is a parse

1457 # error. In any case, proceed with the algorithm as

1458 # written in the following steps.

1459 else:

1460 if formattingElement != self.tree.openElements[-1]:

1461 self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})

1462

1463 # Step 5:

1464

1465 # Let the furthest block be the topmost node in the

1466 # stack of open elements that is lower in the stack

1467 # than the formatting element, and is an element in

1468 # the special category. There might not be one.

1469 afeIndex = self.tree.openElements.index(formattingElement)

1470 furthestBlock = None

1471 for element in self.tree.openElements[afeIndex:]:

1472 if element.nameTuple in specialElements:

1473 furthestBlock = element

1474 break

1475

1476 # Step 6:

1477

1478 # If there is no furthest block, then the UA must

1479 # first pop all the nodes from the bottom of the stack

1480 # of open elements, from the current node up to and

1481 # including the formatting element, then remove the

1482 # formatting element from the list of active

1483 # formatting elements, and finally abort these steps.

1484 if furthestBlock is None:

1485 element = self.tree.openElements.pop()

1486 while element != formattingElement:

1487 element = self.tree.openElements.pop()

1488 self.tree.activeFormattingElements.remove(element)

1489 return

1490

1491 # Step 7

1492 commonAncestor = self.tree.openElements[afeIndex - 1]

1493

1494 # Step 8:

1495 # The bookmark is supposed to help us identify where to reinsert

1496 # nodes in step 15. We have to ensure that we reinsert nodes after

1497 # the node before the active formatting element. Note the bookmark

1498 # can move in step 9.7

1499 bookmark = self.tree.activeFormattingElements.index(formattingElement)

1500

1501 # Step 9

1502 lastNode = node = furthestBlock

1503 innerLoopCounter = 0

1504

1505 index = self.tree.openElements.index(node)

1506 while innerLoopCounter < 3:

1507 innerLoopCounter += 1

1508 # Node is element before node in open elements

1509 index -= 1

1510 node = self.tree.openElements[index]

1511 if node not in self.tree.activeFormattingElements:

1512 self.tree.openElements.remove(node)

1513 continue

1514 # Step 9.6

1515 if node == formattingElement:

1516 break

1517 # Step 9.7

1518 if lastNode == furthestBlock:

1519 bookmark = self.tree.activeFormattingElements.index(node) + 1

1520 # Step 9.8

1521 clone = node.cloneNode()

1522 # Replace node with clone

1523 self.tree.activeFormattingElements[

1524 self.tree.activeFormattingElements.index(node)] = clone

1525 self.tree.openElements[

1526 self.tree.openElements.index(node)] = clone

1527 node = clone

1528 # Step 9.9

1529 # Remove lastNode from its parents, if any

1530 if lastNode.parent:

1531 lastNode.parent.removeChild(lastNode)

1532 node.appendChild(lastNode)

1533 # Step 9.10

1534 lastNode = node

1535

1536 # Step 10

1537 # Foster parent lastNode if commonAncestor is a

1538 # table, tbody, tfoot, thead, or tr we need to foster

1539 # parent the lastNode

1540 if lastNode.parent:

1541 lastNode.parent.removeChild(lastNode)

1542

1543 if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):

1544 parent, insertBefore = self.tree.getTableMisnestedNodePosition()

1545 parent.insertBefore(lastNode, insertBefore)

1546 else:

1547 commonAncestor.appendChild(lastNode)

1548

1549 # Step 11

1550 clone = formattingElement.cloneNode()

1551

1552 # Step 12

1553 furthestBlock.reparentChildren(clone)

1554

1555 # Step 13

1556 furthestBlock.appendChild(clone)

1557

1558 # Step 14

1559 self.tree.activeFormattingElements.remove(formattingElement)

1560 self.tree.activeFormattingElements.insert(bookmark, clone)

1561

1562 # Step 15

1563 self.tree.openElements.remove(formattingElement)

1564 self.tree.openElements.insert(

1565 self.tree.openElements.index(furthestBlock) + 1, clone)

1566

1567 def endTagAppletMarqueeObject(self, token):

1568 if self.tree.elementInScope(token["name"]):

1569 self.tree.generateImpliedEndTags()

1570 if self.tree.openElements[-1].name != token["name"]:

1571 self.parser.parseError("end-tag-too-early", {"name": token["name"]})

1572

1573 if self.tree.elementInScope(token["name"]):

1574 element = self.tree.openElements.pop()

1575 while element.name != token["name"]:

1576 element = self.tree.openElements.pop()

1577 self.tree.clearActiveFormattingElements()

1578

1579 def endTagBr(self, token):

1580 self.parser.parseError("unexpected-end-tag-treated-as",

1581 {"originalName": "br", "newName": "br element"})

1582 self.tree.reconstructActiveFormattingElements()

1583 self.tree.insertElement(impliedTagToken("br", "StartTag"))

1584 self.tree.openElements.pop()

1585

1586 def endTagOther(self, token):

1587 for node in self.tree.openElements[::-1]:

1588 if node.name == token["name"]:

1589 self.tree.generateImpliedEndTags(exclude=token["name"])

1590 if self.tree.openElements[-1].name != token["name"]:

1591 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

1592 while self.tree.openElements.pop() != node:

1593 pass

1594 break

1595 else:

1596 if node.nameTuple in specialElements:

1597 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

1598 break

1599

1600 startTagHandler = _utils.MethodDispatcher([

1601 ("html", Phase.startTagHtml),

1602 (("base", "basefont", "bgsound", "command", "link", "meta",

1603 "script", "style", "title"),

1604 startTagProcessInHead),

1605 ("body", startTagBody),

1606 ("frameset", startTagFrameset),

1607 (("address", "article", "aside", "blockquote", "center", "details",

1608 "dir", "div", "dl", "fieldset", "figcaption", "figure",

1609 "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",

1610 "section", "summary", "ul"),

1611 startTagCloseP),

1612 (headingElements, startTagHeading),

1613 (("pre", "listing"), startTagPreListing),

1614 ("form", startTagForm),

1615 (("li", "dd", "dt"), startTagListItem),

1616 ("plaintext", startTagPlaintext),

1617 ("a", startTagA),

1618 (("b", "big", "code", "em", "font", "i", "s", "small", "strike",

1619 "strong", "tt", "u"), startTagFormatting),

1620 ("nobr", startTagNobr),

1621 ("button", startTagButton),

1622 (("applet", "marquee", "object"), startTagAppletMarqueeObject),

1623 ("xmp", startTagXmp),

1624 ("table", startTagTable),

1625 (("area", "br", "embed", "img", "keygen", "wbr"),

1626 startTagVoidFormatting),

1627 (("param", "source", "track"), startTagParamSource),

1628 ("input", startTagInput),

1629 ("hr", startTagHr),

1630 ("image", startTagImage),

1631 ("isindex", startTagIsIndex),

1632 ("textarea", startTagTextarea),

1633 ("iframe", startTagIFrame),

1634 ("noscript", startTagNoscript),

1635 (("noembed", "noframes"), startTagRawtext),

1636 ("select", startTagSelect),

1637 (("rp", "rt"), startTagRpRt),

1638 (("option", "optgroup"), startTagOpt),

1639 (("math"), startTagMath),

1640 (("svg"), startTagSvg),

1641 (("caption", "col", "colgroup", "frame", "head",

1642 "tbody", "td", "tfoot", "th", "thead",

1643 "tr"), startTagMisplaced)

1644 ])

1645 startTagHandler.default = startTagOther

1646

1647 endTagHandler = _utils.MethodDispatcher([

1648 ("body", endTagBody),

1649 ("html", endTagHtml),

1650 (("address", "article", "aside", "blockquote", "button", "center",

1651 "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",

1652 "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",

1653 "section", "summary", "ul"), endTagBlock),

1654 ("form", endTagForm),

1655 ("p", endTagP),

1656 (("dd", "dt", "li"), endTagListItem),

1657 (headingElements, endTagHeading),

1658 (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",

1659 "strike", "strong", "tt", "u"), endTagFormatting),

1660 (("applet", "marquee", "object"), endTagAppletMarqueeObject),

1661 ("br", endTagBr),

1662 ])

1663 endTagHandler.default = endTagOther

1664

1665 class TextPhase(Phase):

1666 __slots__ = tuple()

1667

1668 def processCharacters(self, token):

1669 self.tree.insertText(token["data"])

1670

1671 def processEOF(self):

1672 self.parser.parseError("expected-named-closing-tag-but-got-eof",

1673 {"name": self.tree.openElements[-1].name})

1674 self.tree.openElements.pop()

1675 self.parser.phase = self.parser.originalPhase

1676 return True

1677

1678 def startTagOther(self, token):

1679 assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']

1680

1681 def endTagScript(self, token):

1682 node = self.tree.openElements.pop()

1683 assert node.name == "script"

1684 self.parser.phase = self.parser.originalPhase

1685 # The rest of this method is all stuff that only happens if

1686 # document.write works

1687

1688 def endTagOther(self, token):

1689 self.tree.openElements.pop()

1690 self.parser.phase = self.parser.originalPhase

1691

1692 startTagHandler = _utils.MethodDispatcher([])

1693 startTagHandler.default = startTagOther

1694 endTagHandler = _utils.MethodDispatcher([

1695 ("script", endTagScript)])

1696 endTagHandler.default = endTagOther

1697

1698 class InTablePhase(Phase):

1699 # http://www.whatwg.org/specs/web-apps/current-work/#in-table

1700 __slots__ = tuple()

1701

1702 # helper methods

1703 def clearStackToTableContext(self):

1704 # "clear the stack back to a table context"

1705 while self.tree.openElements[-1].name not in ("table", "html"):

1706 # self.parser.parseError("unexpected-implied-end-tag-in-table",

1707 # {"name": self.tree.openElements[-1].name})

1708 self.tree.openElements.pop()

1709 # When the current node is <html> it's an innerHTML case

1710

1711 # processing methods

1712 def processEOF(self):

1713 if self.tree.openElements[-1].name != "html":

1714 self.parser.parseError("eof-in-table")

1715 else:

1716 assert self.parser.innerHTML

1717 # Stop parsing

1718

1719 def processSpaceCharacters(self, token):

1720 originalPhase = self.parser.phase

1721 self.parser.phase = self.parser.phases["inTableText"]

1722 self.parser.phase.originalPhase = originalPhase

1723 self.parser.phase.processSpaceCharacters(token)

1724

1725 def processCharacters(self, token):

1726 originalPhase = self.parser.phase

1727 self.parser.phase = self.parser.phases["inTableText"]

1728 self.parser.phase.originalPhase = originalPhase

1729 self.parser.phase.processCharacters(token)

1730

1731 def insertText(self, token):

1732 # If we get here there must be at least one non-whitespace character

1733 # Do the table magic!

1734 self.tree.insertFromTable = True

1735 self.parser.phases["inBody"].processCharacters(token)

1736 self.tree.insertFromTable = False

1737

1738 def startTagCaption(self, token):

1739 self.clearStackToTableContext()

1740 self.tree.activeFormattingElements.append(Marker)

1741 self.tree.insertElement(token)

1742 self.parser.phase = self.parser.phases["inCaption"]

1743

1744 def startTagColgroup(self, token):

1745 self.clearStackToTableContext()

1746 self.tree.insertElement(token)

1747 self.parser.phase = self.parser.phases["inColumnGroup"]

1748

1749 def startTagCol(self, token):

1750 self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))

1751 return token

1752

1753 def startTagRowGroup(self, token):

1754 self.clearStackToTableContext()

1755 self.tree.insertElement(token)

1756 self.parser.phase = self.parser.phases["inTableBody"]

1757

1758 def startTagImplyTbody(self, token):

1759 self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))

1760 return token

1761

1762 def startTagTable(self, token):

1763 self.parser.parseError("unexpected-start-tag-implies-end-tag",

1764 {"startName": "table", "endName": "table"})

1765 self.parser.phase.processEndTag(impliedTagToken("table"))

1766 if not self.parser.innerHTML:

1767 return token

1768

1769 def startTagStyleScript(self, token):

1770 return self.parser.phases["inHead"].processStartTag(token)

1771

1772 def startTagInput(self, token):

1773 if ("type" in token["data"] and

1774 token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):

1775 self.parser.parseError("unexpected-hidden-input-in-table")

1776 self.tree.insertElement(token)

1777 # XXX associate with form

1778 self.tree.openElements.pop()

1779 else:

1780 self.startTagOther(token)

1781

1782 def startTagForm(self, token):

1783 self.parser.parseError("unexpected-form-in-table")

1784 if self.tree.formPointer is None:

1785 self.tree.insertElement(token)

1786 self.tree.formPointer = self.tree.openElements[-1]

1787 self.tree.openElements.pop()

1788

1789 def startTagOther(self, token):

1790 self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})

1791 # Do the table magic!

1792 self.tree.insertFromTable = True

1793 self.parser.phases["inBody"].processStartTag(token)

1794 self.tree.insertFromTable = False

1795

1796 def endTagTable(self, token):

1797 if self.tree.elementInScope("table", variant="table"):

1798 self.tree.generateImpliedEndTags()

1799 if self.tree.openElements[-1].name != "table":

1800 self.parser.parseError("end-tag-too-early-named",

1801 {"gotName": "table",

1802 "expectedName": self.tree.openElements[-1].name})

1803 while self.tree.openElements[-1].name != "table":

1804 self.tree.openElements.pop()

1805 self.tree.openElements.pop()

1806 self.parser.resetInsertionMode()

1807 else:

1808 # innerHTML case

1809 assert self.parser.innerHTML

1810 self.parser.parseError()

1811

1812 def endTagIgnore(self, token):

1813 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

1814

1815 def endTagOther(self, token):

1816 self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})

1817 # Do the table magic!

1818 self.tree.insertFromTable = True

1819 self.parser.phases["inBody"].processEndTag(token)

1820 self.tree.insertFromTable = False

1821

1822 startTagHandler = _utils.MethodDispatcher([

1823 ("html", Phase.startTagHtml),

1824 ("caption", startTagCaption),

1825 ("colgroup", startTagColgroup),

1826 ("col", startTagCol),

1827 (("tbody", "tfoot", "thead"), startTagRowGroup),

1828 (("td", "th", "tr"), startTagImplyTbody),

1829 ("table", startTagTable),

1830 (("style", "script"), startTagStyleScript),

1831 ("input", startTagInput),

1832 ("form", startTagForm)

1833 ])

1834 startTagHandler.default = startTagOther

1835

1836 endTagHandler = _utils.MethodDispatcher([

1837 ("table", endTagTable),

1838 (("body", "caption", "col", "colgroup", "html", "tbody", "td",

1839 "tfoot", "th", "thead", "tr"), endTagIgnore)

1840 ])

1841 endTagHandler.default = endTagOther

1842

1843 class InTableTextPhase(Phase):

1844 __slots__ = ("originalPhase", "characterTokens")

1845

1846 def __init__(self, *args, **kwargs):

1847 super(InTableTextPhase, self).__init__(*args, **kwargs)

1848 self.originalPhase = None

1849 self.characterTokens = []

1850

1851 def flushCharacters(self):

1852 data = "".join([item["data"] for item in self.characterTokens])

1853 if any([item not in spaceCharacters for item in data]):

1854 token = {"type": tokenTypes["Characters"], "data": data}

1855 self.parser.phases["inTable"].insertText(token)

1856 elif data:

1857 self.tree.insertText(data)

1858 self.characterTokens = []

1859

1860 def processComment(self, token):

1861 self.flushCharacters()

1862 self.parser.phase = self.originalPhase

1863 return token

1864

1865 def processEOF(self):

1866 self.flushCharacters()

1867 self.parser.phase = self.originalPhase

1868 return True

1869

1870 def processCharacters(self, token):

1871 if token["data"] == "\u0000":

1872 return

1873 self.characterTokens.append(token)

1874

1875 def processSpaceCharacters(self, token):

1876 # pretty sure we should never reach here

1877 self.characterTokens.append(token)

1878 # assert False

1879

1880 def processStartTag(self, token):

1881 self.flushCharacters()

1882 self.parser.phase = self.originalPhase

1883 return token

1884

1885 def processEndTag(self, token):

1886 self.flushCharacters()

1887 self.parser.phase = self.originalPhase

1888 return token

1889

1890 class InCaptionPhase(Phase):

1891 # http://www.whatwg.org/specs/web-apps/current-work/#in-caption

1892 __slots__ = tuple()

1893

1894 def ignoreEndTagCaption(self):

1895 return not self.tree.elementInScope("caption", variant="table")

1896

1897 def processEOF(self):

1898 self.parser.phases["inBody"].processEOF()

1899

1900 def processCharacters(self, token):

1901 return self.parser.phases["inBody"].processCharacters(token)

1902

1903 def startTagTableElement(self, token):

1904 self.parser.parseError()

1905 # XXX Have to duplicate logic here to find out if the tag is ignored

1906 ignoreEndTag = self.ignoreEndTagCaption()

1907 self.parser.phase.processEndTag(impliedTagToken("caption"))

1908 if not ignoreEndTag:

1909 return token

1910

1911 def startTagOther(self, token):

1912 return self.parser.phases["inBody"].processStartTag(token)

1913

1914 def endTagCaption(self, token):

1915 if not self.ignoreEndTagCaption():

1916 # AT this code is quite similar to endTagTable in "InTable"

1917 self.tree.generateImpliedEndTags()

1918 if self.tree.openElements[-1].name != "caption":

1919 self.parser.parseError("expected-one-end-tag-but-got-another",

1920 {"gotName": "caption",

1921 "expectedName": self.tree.openElements[-1].name})

1922 while self.tree.openElements[-1].name != "caption":

1923 self.tree.openElements.pop()

1924 self.tree.openElements.pop()

1925 self.tree.clearActiveFormattingElements()

1926 self.parser.phase = self.parser.phases["inTable"]

1927 else:

1928 # innerHTML case

1929 assert self.parser.innerHTML

1930 self.parser.parseError()

1931

1932 def endTagTable(self, token):

1933 self.parser.parseError()

1934 ignoreEndTag = self.ignoreEndTagCaption()

1935 self.parser.phase.processEndTag(impliedTagToken("caption"))

1936 if not ignoreEndTag:

1937 return token

1938

1939 def endTagIgnore(self, token):

1940 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

1941

1942 def endTagOther(self, token):

1943 return self.parser.phases["inBody"].processEndTag(token)

1944

1945 startTagHandler = _utils.MethodDispatcher([

1946 ("html", Phase.startTagHtml),

1947 (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",

1948 "thead", "tr"), startTagTableElement)

1949 ])

1950 startTagHandler.default = startTagOther

1951

1952 endTagHandler = _utils.MethodDispatcher([

1953 ("caption", endTagCaption),

1954 ("table", endTagTable),

1955 (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",

1956 "thead", "tr"), endTagIgnore)

1957 ])

1958 endTagHandler.default = endTagOther

1959

1960 class InColumnGroupPhase(Phase):

1961 # http://www.whatwg.org/specs/web-apps/current-work/#in-column

1962 __slots__ = tuple()

1963

1964 def ignoreEndTagColgroup(self):

1965 return self.tree.openElements[-1].name == "html"

1966

1967 def processEOF(self):

1968 if self.tree.openElements[-1].name == "html":

1969 assert self.parser.innerHTML

1970 return

1971 else:

1972 ignoreEndTag = self.ignoreEndTagColgroup()

1973 self.endTagColgroup(impliedTagToken("colgroup"))

1974 if not ignoreEndTag:

1975 return True

1976

1977 def processCharacters(self, token):

1978 ignoreEndTag = self.ignoreEndTagColgroup()

1979 self.endTagColgroup(impliedTagToken("colgroup"))

1980 if not ignoreEndTag:

1981 return token

1982

1983 def startTagCol(self, token):

1984 self.tree.insertElement(token)

1985 self.tree.openElements.pop()

1986 token["selfClosingAcknowledged"] = True

1987

1988 def startTagOther(self, token):

1989 ignoreEndTag = self.ignoreEndTagColgroup()

1990 self.endTagColgroup(impliedTagToken("colgroup"))

1991 if not ignoreEndTag:

1992 return token

1993

1994 def endTagColgroup(self, token):

1995 if self.ignoreEndTagColgroup():

1996 # innerHTML case

1997 assert self.parser.innerHTML

1998 self.parser.parseError()

1999 else:

2000 self.tree.openElements.pop()

2001 self.parser.phase = self.parser.phases["inTable"]

2002

2003 def endTagCol(self, token):

2004 self.parser.parseError("no-end-tag", {"name": "col"})

2005

2006 def endTagOther(self, token):

2007 ignoreEndTag = self.ignoreEndTagColgroup()

2008 self.endTagColgroup(impliedTagToken("colgroup"))

2009 if not ignoreEndTag:

2010 return token

2011

2012 startTagHandler = _utils.MethodDispatcher([

2013 ("html", Phase.startTagHtml),

2014 ("col", startTagCol)

2015 ])

2016 startTagHandler.default = startTagOther

2017

2018 endTagHandler = _utils.MethodDispatcher([

2019 ("colgroup", endTagColgroup),

2020 ("col", endTagCol)

2021 ])

2022 endTagHandler.default = endTagOther

2023

2024 class InTableBodyPhase(Phase):

2025 # http://www.whatwg.org/specs/web-apps/current-work/#in-table0

2026 __slots__ = tuple()

2027

2028 # helper methods

2029 def clearStackToTableBodyContext(self):

2030 while self.tree.openElements[-1].name not in ("tbody", "tfoot",

2031 "thead", "html"):

2032 # self.parser.parseError("unexpected-implied-end-tag-in-table",

2033 # {"name": self.tree.openElements[-1].name})

2034 self.tree.openElements.pop()

2035 if self.tree.openElements[-1].name == "html":

2036 assert self.parser.innerHTML

2037

2038 # the rest

2039 def processEOF(self):

2040 self.parser.phases["inTable"].processEOF()

2041

2042 def processSpaceCharacters(self, token):

2043 return self.parser.phases["inTable"].processSpaceCharacters(token)

2044

2045 def processCharacters(self, token):

2046 return self.parser.phases["inTable"].processCharacters(token)

2047

2048 def startTagTr(self, token):

2049 self.clearStackToTableBodyContext()

2050 self.tree.insertElement(token)

2051 self.parser.phase = self.parser.phases["inRow"]

2052

2053 def startTagTableCell(self, token):

2054 self.parser.parseError("unexpected-cell-in-table-body",

2055 {"name": token["name"]})

2056 self.startTagTr(impliedTagToken("tr", "StartTag"))

2057 return token

2058

2059 def startTagTableOther(self, token):

2060 # XXX AT Any ideas on how to share this with endTagTable?

2061 if (self.tree.elementInScope("tbody", variant="table") or

2062 self.tree.elementInScope("thead", variant="table") or

2063 self.tree.elementInScope("tfoot", variant="table")):

2064 self.clearStackToTableBodyContext()

2065 self.endTagTableRowGroup(

2066 impliedTagToken(self.tree.openElements[-1].name))

2067 return token

2068 else:

2069 # innerHTML case

2070 assert self.parser.innerHTML

2071 self.parser.parseError()

2072

2073 def startTagOther(self, token):

2074 return self.parser.phases["inTable"].processStartTag(token)

2075

2076 def endTagTableRowGroup(self, token):

2077 if self.tree.elementInScope(token["name"], variant="table"):

2078 self.clearStackToTableBodyContext()

2079 self.tree.openElements.pop()

2080 self.parser.phase = self.parser.phases["inTable"]

2081 else:

2082 self.parser.parseError("unexpected-end-tag-in-table-body",

2083 {"name": token["name"]})

2084

2085 def endTagTable(self, token):

2086 if (self.tree.elementInScope("tbody", variant="table") or

2087 self.tree.elementInScope("thead", variant="table") or

2088 self.tree.elementInScope("tfoot", variant="table")):

2089 self.clearStackToTableBodyContext()

2090 self.endTagTableRowGroup(

2091 impliedTagToken(self.tree.openElements[-1].name))

2092 return token

2093 else:

2094 # innerHTML case

2095 assert self.parser.innerHTML

2096 self.parser.parseError()

2097

2098 def endTagIgnore(self, token):

2099 self.parser.parseError("unexpected-end-tag-in-table-body",

2100 {"name": token["name"]})

2101

2102 def endTagOther(self, token):

2103 return self.parser.phases["inTable"].processEndTag(token)

2104

2105 startTagHandler = _utils.MethodDispatcher([

2106 ("html", Phase.startTagHtml),

2107 ("tr", startTagTr),

2108 (("td", "th"), startTagTableCell),

2109 (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),

2110 startTagTableOther)

2111 ])

2112 startTagHandler.default = startTagOther

2113

2114 endTagHandler = _utils.MethodDispatcher([

2115 (("tbody", "tfoot", "thead"), endTagTableRowGroup),

2116 ("table", endTagTable),

2117 (("body", "caption", "col", "colgroup", "html", "td", "th",

2118 "tr"), endTagIgnore)

2119 ])

2120 endTagHandler.default = endTagOther

2121

2122 class InRowPhase(Phase):

2123 # http://www.whatwg.org/specs/web-apps/current-work/#in-row

2124 __slots__ = tuple()

2125

2126 # helper methods (XXX unify this with other table helper methods)

2127 def clearStackToTableRowContext(self):

2128 while self.tree.openElements[-1].name not in ("tr", "html"):

2129 self.parser.parseError("unexpected-implied-end-tag-in-table-row",

2130 {"name": self.tree.openElements[-1].name})

2131 self.tree.openElements.pop()

2132

2133 def ignoreEndTagTr(self):

2134 return not self.tree.elementInScope("tr", variant="table")

2135

2136 # the rest

2137 def processEOF(self):

2138 self.parser.phases["inTable"].processEOF()

2139

2140 def processSpaceCharacters(self, token):

2141 return self.parser.phases["inTable"].processSpaceCharacters(token)

2142

2143 def processCharacters(self, token):

2144 return self.parser.phases["inTable"].processCharacters(token)

2145

2146 def startTagTableCell(self, token):

2147 self.clearStackToTableRowContext()

2148 self.tree.insertElement(token)

2149 self.parser.phase = self.parser.phases["inCell"]

2150 self.tree.activeFormattingElements.append(Marker)

2151

2152 def startTagTableOther(self, token):

2153 ignoreEndTag = self.ignoreEndTagTr()

2154 self.endTagTr(impliedTagToken("tr"))

2155 # XXX how are we sure it's always ignored in the innerHTML case?

2156 if not ignoreEndTag:

2157 return token

2158

2159 def startTagOther(self, token):

2160 return self.parser.phases["inTable"].processStartTag(token)

2161

2162 def endTagTr(self, token):

2163 if not self.ignoreEndTagTr():

2164 self.clearStackToTableRowContext()

2165 self.tree.openElements.pop()

2166 self.parser.phase = self.parser.phases["inTableBody"]

2167 else:

2168 # innerHTML case

2169 assert self.parser.innerHTML

2170 self.parser.parseError()

2171

2172 def endTagTable(self, token):

2173 ignoreEndTag = self.ignoreEndTagTr()

2174 self.endTagTr(impliedTagToken("tr"))

2175 # Reprocess the current tag if the tr end tag was not ignored

2176 # XXX how are we sure it's always ignored in the innerHTML case?

2177 if not ignoreEndTag:

2178 return token

2179

2180 def endTagTableRowGroup(self, token):

2181 if self.tree.elementInScope(token["name"], variant="table"):

2182 self.endTagTr(impliedTagToken("tr"))

2183 return token

2184 else:

2185 self.parser.parseError()

2186

2187 def endTagIgnore(self, token):

2188 self.parser.parseError("unexpected-end-tag-in-table-row",

2189 {"name": token["name"]})

2190

2191 def endTagOther(self, token):

2192 return self.parser.phases["inTable"].processEndTag(token)

2193

2194 startTagHandler = _utils.MethodDispatcher([

2195 ("html", Phase.startTagHtml),

2196 (("td", "th"), startTagTableCell),

2197 (("caption", "col", "colgroup", "tbody", "tfoot", "thead",

2198 "tr"), startTagTableOther)

2199 ])

2200 startTagHandler.default = startTagOther

2201

2202 endTagHandler = _utils.MethodDispatcher([

2203 ("tr", endTagTr),

2204 ("table", endTagTable),

2205 (("tbody", "tfoot", "thead"), endTagTableRowGroup),

2206 (("body", "caption", "col", "colgroup", "html", "td", "th"),

2207 endTagIgnore)

2208 ])

2209 endTagHandler.default = endTagOther

2210

2211 class InCellPhase(Phase):

2212 # http://www.whatwg.org/specs/web-apps/current-work/#in-cell

2213 __slots__ = tuple()

2214

2215 # helper

2216 def closeCell(self):

2217 if self.tree.elementInScope("td", variant="table"):

2218 self.endTagTableCell(impliedTagToken("td"))

2219 elif self.tree.elementInScope("th", variant="table"):

2220 self.endTagTableCell(impliedTagToken("th"))

2221

2222 # the rest

2223 def processEOF(self):

2224 self.parser.phases["inBody"].processEOF()

2225

2226 def processCharacters(self, token):

2227 return self.parser.phases["inBody"].processCharacters(token)

2228

2229 def startTagTableOther(self, token):

2230 if (self.tree.elementInScope("td", variant="table") or

2231 self.tree.elementInScope("th", variant="table")):

2232 self.closeCell()

2233 return token

2234 else:

2235 # innerHTML case

2236 assert self.parser.innerHTML

2237 self.parser.parseError()

2238

2239 def startTagOther(self, token):

2240 return self.parser.phases["inBody"].processStartTag(token)

2241

2242 def endTagTableCell(self, token):

2243 if self.tree.elementInScope(token["name"], variant="table"):

2244 self.tree.generateImpliedEndTags(token["name"])

2245 if self.tree.openElements[-1].name != token["name"]:

2246 self.parser.parseError("unexpected-cell-end-tag",

2247 {"name": token["name"]})

2248 while True:

2249 node = self.tree.openElements.pop()

2250 if node.name == token["name"]:

2251 break

2252 else:

2253 self.tree.openElements.pop()

2254 self.tree.clearActiveFormattingElements()

2255 self.parser.phase = self.parser.phases["inRow"]

2256 else:

2257 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

2258

2259 def endTagIgnore(self, token):

2260 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

2261

2262 def endTagImply(self, token):

2263 if self.tree.elementInScope(token["name"], variant="table"):

2264 self.closeCell()

2265 return token

2266 else:

2267 # sometimes innerHTML case

2268 self.parser.parseError()

2269

2270 def endTagOther(self, token):

2271 return self.parser.phases["inBody"].processEndTag(token)

2272

2273 startTagHandler = _utils.MethodDispatcher([

2274 ("html", Phase.startTagHtml),

2275 (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",

2276 "thead", "tr"), startTagTableOther)

2277 ])

2278 startTagHandler.default = startTagOther

2279

2280 endTagHandler = _utils.MethodDispatcher([

2281 (("td", "th"), endTagTableCell),

2282 (("body", "caption", "col", "colgroup", "html"), endTagIgnore),

2283 (("table", "tbody", "tfoot", "thead", "tr"), endTagImply)

2284 ])

2285 endTagHandler.default = endTagOther

2286

2287 class InSelectPhase(Phase):

2288 __slots__ = tuple()

2289

2290 # http://www.whatwg.org/specs/web-apps/current-work/#in-select

2291 def processEOF(self):

2292 if self.tree.openElements[-1].name != "html":

2293 self.parser.parseError("eof-in-select")

2294 else:

2295 assert self.parser.innerHTML

2296

2297 def processCharacters(self, token):

2298 if token["data"] == "\u0000":

2299 return

2300 self.tree.insertText(token["data"])

2301

2302 def startTagOption(self, token):

2303 # We need to imply </option> if <option> is the current node.

2304 if self.tree.openElements[-1].name == "option":

2305 self.tree.openElements.pop()

2306 self.tree.insertElement(token)

2307

2308 def startTagOptgroup(self, token):

2309 if self.tree.openElements[-1].name == "option":

2310 self.tree.openElements.pop()

2311 if self.tree.openElements[-1].name == "optgroup":

2312 self.tree.openElements.pop()

2313 self.tree.insertElement(token)

2314

2315 def startTagSelect(self, token):

2316 self.parser.parseError("unexpected-select-in-select")

2317 self.endTagSelect(impliedTagToken("select"))

2318

2319 def startTagInput(self, token):

2320 self.parser.parseError("unexpected-input-in-select")

2321 if self.tree.elementInScope("select", variant="select"):

2322 self.endTagSelect(impliedTagToken("select"))

2323 return token

2324 else:

2325 assert self.parser.innerHTML

2326

2327 def startTagScript(self, token):

2328 return self.parser.phases["inHead"].processStartTag(token)

2329

2330 def startTagOther(self, token):

2331 self.parser.parseError("unexpected-start-tag-in-select",

2332 {"name": token["name"]})

2333

2334 def endTagOption(self, token):

2335 if self.tree.openElements[-1].name == "option":

2336 self.tree.openElements.pop()

2337 else:

2338 self.parser.parseError("unexpected-end-tag-in-select",

2339 {"name": "option"})

2340

2341 def endTagOptgroup(self, token):

2342 # </optgroup> implicitly closes <option>

2343 if (self.tree.openElements[-1].name == "option" and

2344 self.tree.openElements[-2].name == "optgroup"):

2345 self.tree.openElements.pop()

2346 # It also closes </optgroup>

2347 if self.tree.openElements[-1].name == "optgroup":

2348 self.tree.openElements.pop()

2349 # But nothing else

2350 else:

2351 self.parser.parseError("unexpected-end-tag-in-select",

2352 {"name": "optgroup"})

2353

2354 def endTagSelect(self, token):

2355 if self.tree.elementInScope("select", variant="select"):

2356 node = self.tree.openElements.pop()

2357 while node.name != "select":

2358 node = self.tree.openElements.pop()

2359 self.parser.resetInsertionMode()

2360 else:

2361 # innerHTML case

2362 assert self.parser.innerHTML

2363 self.parser.parseError()

2364

2365 def endTagOther(self, token):

2366 self.parser.parseError("unexpected-end-tag-in-select",

2367 {"name": token["name"]})

2368

2369 startTagHandler = _utils.MethodDispatcher([

2370 ("html", Phase.startTagHtml),

2371 ("option", startTagOption),

2372 ("optgroup", startTagOptgroup),

2373 ("select", startTagSelect),

2374 (("input", "keygen", "textarea"), startTagInput),

2375 ("script", startTagScript)

2376 ])

2377 startTagHandler.default = startTagOther

2378

2379 endTagHandler = _utils.MethodDispatcher([

2380 ("option", endTagOption),

2381 ("optgroup", endTagOptgroup),

2382 ("select", endTagSelect)

2383 ])

2384 endTagHandler.default = endTagOther

2385

2386 class InSelectInTablePhase(Phase):

2387 __slots__ = tuple()

2388

2389 def processEOF(self):

2390 self.parser.phases["inSelect"].processEOF()

2391

2392 def processCharacters(self, token):

2393 return self.parser.phases["inSelect"].processCharacters(token)

2394

2395 def startTagTable(self, token):

2396 self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})

2397 self.endTagOther(impliedTagToken("select"))

2398 return token

2399

2400 def startTagOther(self, token):

2401 return self.parser.phases["inSelect"].processStartTag(token)

2402

2403 def endTagTable(self, token):

2404 self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})

2405 if self.tree.elementInScope(token["name"], variant="table"):

2406 self.endTagOther(impliedTagToken("select"))

2407 return token

2408

2409 def endTagOther(self, token):

2410 return self.parser.phases["inSelect"].processEndTag(token)

2411

2412 startTagHandler = _utils.MethodDispatcher([

2413 (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),

2414 startTagTable)

2415 ])

2416 startTagHandler.default = startTagOther

2417

2418 endTagHandler = _utils.MethodDispatcher([

2419 (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),

2420 endTagTable)

2421 ])

2422 endTagHandler.default = endTagOther

2423

2424 class InForeignContentPhase(Phase):

2425 __slots__ = tuple()

2426

2427 breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",

2428 "center", "code", "dd", "div", "dl", "dt",

2429 "em", "embed", "h1", "h2", "h3",

2430 "h4", "h5", "h6", "head", "hr", "i", "img",

2431 "li", "listing", "menu", "meta", "nobr",

2432 "ol", "p", "pre", "ruby", "s", "small",

2433 "span", "strong", "strike", "sub", "sup",

2434 "table", "tt", "u", "ul", "var"])

2435

2436 def adjustSVGTagNames(self, token):

2437 replacements = {"altglyph": "altGlyph",

2438 "altglyphdef": "altGlyphDef",

2439 "altglyphitem": "altGlyphItem",

2440 "animatecolor": "animateColor",

2441 "animatemotion": "animateMotion",

2442 "animatetransform": "animateTransform",

2443 "clippath": "clipPath",

2444 "feblend": "feBlend",

2445 "fecolormatrix": "feColorMatrix",

2446 "fecomponenttransfer": "feComponentTransfer",

2447 "fecomposite": "feComposite",

2448 "feconvolvematrix": "feConvolveMatrix",

2449 "fediffuselighting": "feDiffuseLighting",

2450 "fedisplacementmap": "feDisplacementMap",

2451 "fedistantlight": "feDistantLight",

2452 "feflood": "feFlood",

2453 "fefunca": "feFuncA",

2454 "fefuncb": "feFuncB",

2455 "fefuncg": "feFuncG",

2456 "fefuncr": "feFuncR",

2457 "fegaussianblur": "feGaussianBlur",

2458 "feimage": "feImage",

2459 "femerge": "feMerge",

2460 "femergenode": "feMergeNode",

2461 "femorphology": "feMorphology",

2462 "feoffset": "feOffset",

2463 "fepointlight": "fePointLight",

2464 "fespecularlighting": "feSpecularLighting",

2465 "fespotlight": "feSpotLight",

2466 "fetile": "feTile",

2467 "feturbulence": "feTurbulence",

2468 "foreignobject": "foreignObject",

2469 "glyphref": "glyphRef",

2470 "lineargradient": "linearGradient",

2471 "radialgradient": "radialGradient",

2472 "textpath": "textPath"}

2473

2474 if token["name"] in replacements:

2475 token["name"] = replacements[token["name"]]

2476

2477 def processCharacters(self, token):

2478 if token["data"] == "\u0000":

2479 token["data"] = "\uFFFD"

2480 elif (self.parser.framesetOK and

2481 any(char not in spaceCharacters for char in token["data"])):

2482 self.parser.framesetOK = False

2483 Phase.processCharacters(self, token)

2484

2485 def processStartTag(self, token):

2486 currentNode = self.tree.openElements[-1]

2487 if (token["name"] in self.breakoutElements or

2488 (token["name"] == "font" and

2489 set(token["data"].keys()) & {"color", "face", "size"})):

2490 self.parser.parseError("unexpected-html-element-in-foreign-content",

2491 {"name": token["name"]})

2492 while (self.tree.openElements[-1].namespace !=

2493 self.tree.defaultNamespace and

2494 not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and

2495 not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):

2496 self.tree.openElements.pop()

2497 return token

2498

2499 else:

2500 if currentNode.namespace == namespaces["mathml"]:

2501 self.parser.adjustMathMLAttributes(token)

2502 elif currentNode.namespace == namespaces["svg"]:

2503 self.adjustSVGTagNames(token)

2504 self.parser.adjustSVGAttributes(token)

2505 self.parser.adjustForeignAttributes(token)

2506 token["namespace"] = currentNode.namespace

2507 self.tree.insertElement(token)

2508 if token["selfClosing"]:

2509 self.tree.openElements.pop()

2510 token["selfClosingAcknowledged"] = True

2511

2512 def processEndTag(self, token):

2513 nodeIndex = len(self.tree.openElements) - 1

2514 node = self.tree.openElements[-1]

2515 if node.name.translate(asciiUpper2Lower) != token["name"]:

2516 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

2517

2518 while True:

2519 if node.name.translate(asciiUpper2Lower) == token["name"]:

2520 # XXX this isn't in the spec but it seems necessary

2521 if self.parser.phase == self.parser.phases["inTableText"]:

2522 self.parser.phase.flushCharacters()

2523 self.parser.phase = self.parser.phase.originalPhase

2524 while self.tree.openElements.pop() != node:

2525 assert self.tree.openElements

2526 new_token = None

2527 break

2528 nodeIndex -= 1

2529

2530 node = self.tree.openElements[nodeIndex]

2531 if node.namespace != self.tree.defaultNamespace:

2532 continue

2533 else:

2534 new_token = self.parser.phase.processEndTag(token)

2535 break

2536 return new_token

2537

2538 class AfterBodyPhase(Phase):

2539 __slots__ = tuple()

2540

2541 def processEOF(self):

2542 # Stop parsing

2543 pass

2544

2545 def processComment(self, token):

2546 # This is needed because data is to be appended to the <html> element

2547 # here and not to whatever is currently open.

2548 self.tree.insertComment(token, self.tree.openElements[0])

2549

2550 def processCharacters(self, token):

2551 self.parser.parseError("unexpected-char-after-body")

2552 self.parser.phase = self.parser.phases["inBody"]

2553 return token

2554

2555 def startTagHtml(self, token):

2556 return self.parser.phases["inBody"].processStartTag(token)

2557

2558 def startTagOther(self, token):

2559 self.parser.parseError("unexpected-start-tag-after-body",

2560 {"name": token["name"]})

2561 self.parser.phase = self.parser.phases["inBody"]

2562 return token

2563

2564 def endTagHtml(self, name):

2565 if self.parser.innerHTML:

2566 self.parser.parseError("unexpected-end-tag-after-body-innerhtml")

2567 else:

2568 self.parser.phase = self.parser.phases["afterAfterBody"]

2569

2570 def endTagOther(self, token):

2571 self.parser.parseError("unexpected-end-tag-after-body",

2572 {"name": token["name"]})

2573 self.parser.phase = self.parser.phases["inBody"]

2574 return token

2575

2576 startTagHandler = _utils.MethodDispatcher([

2577 ("html", startTagHtml)

2578 ])

2579 startTagHandler.default = startTagOther

2580

2581 endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])

2582 endTagHandler.default = endTagOther

2583

2584 class InFramesetPhase(Phase):

2585 # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset

2586 __slots__ = tuple()

2587

2588 def processEOF(self):

2589 if self.tree.openElements[-1].name != "html":

2590 self.parser.parseError("eof-in-frameset")

2591 else:

2592 assert self.parser.innerHTML

2593

2594 def processCharacters(self, token):

2595 self.parser.parseError("unexpected-char-in-frameset")

2596

2597 def startTagFrameset(self, token):

2598 self.tree.insertElement(token)

2599

2600 def startTagFrame(self, token):

2601 self.tree.insertElement(token)

2602 self.tree.openElements.pop()

2603

2604 def startTagNoframes(self, token):

2605 return self.parser.phases["inBody"].processStartTag(token)

2606

2607 def startTagOther(self, token):

2608 self.parser.parseError("unexpected-start-tag-in-frameset",

2609 {"name": token["name"]})

2610

2611 def endTagFrameset(self, token):

2612 if self.tree.openElements[-1].name == "html":

2613 # innerHTML case

2614 self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")

2615 else:

2616 self.tree.openElements.pop()

2617 if (not self.parser.innerHTML and

2618 self.tree.openElements[-1].name != "frameset"):

2619 # If we're not in innerHTML mode and the current node is not a

2620 # "frameset" element (anymore) then switch.

2621 self.parser.phase = self.parser.phases["afterFrameset"]

2622

2623 def endTagOther(self, token):

2624 self.parser.parseError("unexpected-end-tag-in-frameset",

2625 {"name": token["name"]})

2626

2627 startTagHandler = _utils.MethodDispatcher([

2628 ("html", Phase.startTagHtml),

2629 ("frameset", startTagFrameset),

2630 ("frame", startTagFrame),

2631 ("noframes", startTagNoframes)

2632 ])

2633 startTagHandler.default = startTagOther

2634

2635 endTagHandler = _utils.MethodDispatcher([

2636 ("frameset", endTagFrameset)

2637 ])

2638 endTagHandler.default = endTagOther

2639

2640 class AfterFramesetPhase(Phase):

2641 # http://www.whatwg.org/specs/web-apps/current-work/#after3

2642 __slots__ = tuple()

2643

2644 def processEOF(self):

2645 # Stop parsing

2646 pass

2647

2648 def processCharacters(self, token):

2649 self.parser.parseError("unexpected-char-after-frameset")

2650

2651 def startTagNoframes(self, token):

2652 return self.parser.phases["inHead"].processStartTag(token)

2653

2654 def startTagOther(self, token):

2655 self.parser.parseError("unexpected-start-tag-after-frameset",

2656 {"name": token["name"]})

2657

2658 def endTagHtml(self, token):

2659 self.parser.phase = self.parser.phases["afterAfterFrameset"]

2660

2661 def endTagOther(self, token):

2662 self.parser.parseError("unexpected-end-tag-after-frameset",

2663 {"name": token["name"]})

2664

2665 startTagHandler = _utils.MethodDispatcher([

2666 ("html", Phase.startTagHtml),

2667 ("noframes", startTagNoframes)

2668 ])

2669 startTagHandler.default = startTagOther

2670

2671 endTagHandler = _utils.MethodDispatcher([

2672 ("html", endTagHtml)

2673 ])

2674 endTagHandler.default = endTagOther

2675

2676 class AfterAfterBodyPhase(Phase):

2677 __slots__ = tuple()

2678

2679 def processEOF(self):

2680 pass

2681

2682 def processComment(self, token):

2683 self.tree.insertComment(token, self.tree.document)

2684

2685 def processSpaceCharacters(self, token):

2686 return self.parser.phases["inBody"].processSpaceCharacters(token)

2687

2688 def processCharacters(self, token):

2689 self.parser.parseError("expected-eof-but-got-char")

2690 self.parser.phase = self.parser.phases["inBody"]

2691 return token

2692

2693 def startTagHtml(self, token):

2694 return self.parser.phases["inBody"].processStartTag(token)

2695

2696 def startTagOther(self, token):

2697 self.parser.parseError("expected-eof-but-got-start-tag",

2698 {"name": token["name"]})

2699 self.parser.phase = self.parser.phases["inBody"]

2700 return token

2701

2702 def processEndTag(self, token):

2703 self.parser.parseError("expected-eof-but-got-end-tag",

2704 {"name": token["name"]})

2705 self.parser.phase = self.parser.phases["inBody"]

2706 return token

2707

2708 startTagHandler = _utils.MethodDispatcher([

2709 ("html", startTagHtml)

2710 ])

2711 startTagHandler.default = startTagOther

2712

2713 class AfterAfterFramesetPhase(Phase):

2714 __slots__ = tuple()

2715

2716 def processEOF(self):

2717 pass

2718

2719 def processComment(self, token):

2720 self.tree.insertComment(token, self.tree.document)

2721

2722 def processSpaceCharacters(self, token):

2723 return self.parser.phases["inBody"].processSpaceCharacters(token)

2724

2725 def processCharacters(self, token):

2726 self.parser.parseError("expected-eof-but-got-char")

2727

2728 def startTagHtml(self, token):

2729 return self.parser.phases["inBody"].processStartTag(token)

2730

2731 def startTagNoFrames(self, token):

2732 return self.parser.phases["inHead"].processStartTag(token)

2733

2734 def startTagOther(self, token):

2735 self.parser.parseError("expected-eof-but-got-start-tag",

2736 {"name": token["name"]})

2737

2738 def processEndTag(self, token):

2739 self.parser.parseError("expected-eof-but-got-end-tag",

2740 {"name": token["name"]})

2741

2742 startTagHandler = _utils.MethodDispatcher([

2743 ("html", startTagHtml),

2744 ("noframes", startTagNoFrames)

2745 ])

2746 startTagHandler.default = startTagOther

2747

2748 # pylint:enable=unused-argument

2749

2750 return {

2751 "initial": InitialPhase,

2752 "beforeHtml": BeforeHtmlPhase,

2753 "beforeHead": BeforeHeadPhase,

2754 "inHead": InHeadPhase,

2755 "inHeadNoscript": InHeadNoscriptPhase,

2756 "afterHead": AfterHeadPhase,

2757 "inBody": InBodyPhase,

2758 "text": TextPhase,

2759 "inTable": InTablePhase,

2760 "inTableText": InTableTextPhase,

2761 "inCaption": InCaptionPhase,

2762 "inColumnGroup": InColumnGroupPhase,

2763 "inTableBody": InTableBodyPhase,

2764 "inRow": InRowPhase,

2765 "inCell": InCellPhase,

2766 "inSelect": InSelectPhase,

2767 "inSelectInTable": InSelectInTablePhase,

2768 "inForeignContent": InForeignContentPhase,

2769 "afterBody": AfterBodyPhase,

2770 "inFrameset": InFramesetPhase,

2771 "afterFrameset": AfterFramesetPhase,

2772 "afterAfterBody": AfterAfterBodyPhase,

2773 "afterAfterFrameset": AfterAfterFramesetPhase,

2774 # XXX after after frameset

2775 }

2776

2777

2778def adjust_attributes(token, replacements):

2779 needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)

2780 if needs_adjustment:

2781 token['data'] = type(token['data'])((replacements.get(k, k), v)

2782 for k, v in token['data'].items())

2783

2784

2785def impliedTagToken(name, type="EndTag", attributes=None,

2786 selfClosing=False):

2787 if attributes is None:

2788 attributes = {}

2789 return {"type": tokenTypes[type], "name": name, "data": attributes,

2790 "selfClosing": selfClosing}

2791

2792

2793class ParseError(Exception):

2794 """Error in parsed document"""

2795 pass

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bleach/_vendor/html5lib/html5parser.py: 2%

1533 statements