Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bleach/_vendor/html5lib/treewalkers/__init_

1"""A collection of modules for iterating through different kinds of

2tree, generating tokens identical to those produced by the tokenizer

3module.

5To create a tree walker for a new type of tree, you need to

6implement a tree walker object (called TreeWalker by convention) that

7implements a 'serialize' method which takes a tree as sole argument and

8returns an iterator which generates tokens.

9"""

11from __future__ import absolute_import, division, unicode_literals

13from .. import constants

14from .._utils import default_etree

16__all__ = ["getTreeWalker", "pprint"]

18treeWalkerCache = {}

21def getTreeWalker(treeType, implementation=None, **kwargs):

22 """Get a TreeWalker class for various types of tree with built-in support

24 :arg str treeType: the name of the tree type required (case-insensitive).

25 Supported values are:

27 * "dom": The xml.dom.minidom DOM implementation

28 * "etree": A generic walker for tree implementations exposing an

29 elementtree-like interface (known to work with ElementTree,

30 cElementTree and lxml.etree).

31 * "lxml": Optimized walker for lxml.etree

32 * "genshi": a Genshi stream

34 :arg implementation: A module implementing the tree type e.g.

35 xml.etree.ElementTree or cElementTree (Currently applies to the "etree"

36 tree type only).

38 :arg kwargs: keyword arguments passed to the etree walker--for other

39 walkers, this has no effect

41 :returns: a TreeWalker class

43 """

45 treeType = treeType.lower()

46 if treeType not in treeWalkerCache:

47 if treeType == "dom":

48 from . import dom

49 treeWalkerCache[treeType] = dom.TreeWalker

50 elif treeType == "genshi":

51 from . import genshi

52 treeWalkerCache[treeType] = genshi.TreeWalker

53 elif treeType == "lxml":

54 from . import etree_lxml

55 treeWalkerCache[treeType] = etree_lxml.TreeWalker

56 elif treeType == "etree":

57 from . import etree

58 if implementation is None:

59 implementation = default_etree

60 # XXX: NEVER cache here, caching is done in the etree submodule

61 return etree.getETreeModule(implementation, **kwargs).TreeWalker

62 return treeWalkerCache.get(treeType)

65def concatenateCharacterTokens(tokens):

66 pendingCharacters = []

67 for token in tokens:

68 type = token["type"]

69 if type in ("Characters", "SpaceCharacters"):

70 pendingCharacters.append(token["data"])

71 else:

72 if pendingCharacters:

73 yield {"type": "Characters", "data": "".join(pendingCharacters)}

74 pendingCharacters = []

75 yield token

76 if pendingCharacters:

77 yield {"type": "Characters", "data": "".join(pendingCharacters)}

80def pprint(walker):

81 """Pretty printer for tree walkers

83 Takes a TreeWalker instance and pretty prints the output of walking the tree.

85 :arg walker: a TreeWalker instance

87 """

88 output = []

89 indent = 0

90 for token in concatenateCharacterTokens(walker):

91 type = token["type"]

92 if type in ("StartTag", "EmptyTag"):

93 # tag name

94 if token["namespace"] and token["namespace"] != constants.namespaces["html"]:

95 if token["namespace"] in constants.prefixes:

96 ns = constants.prefixes[token["namespace"]]

97 else:

98 ns = token["namespace"]

99 name = "%s %s" % (ns, token["name"])

100 else:

101 name = token["name"]

102 output.append("%s<%s>" % (" " * indent, name))

103 indent += 2

104 # attributes (sorted for consistent ordering)

105 attrs = token["data"]

106 for (namespace, localname), value in sorted(attrs.items()):

107 if namespace:

108 if namespace in constants.prefixes:

109 ns = constants.prefixes[namespace]

110 else:

111 ns = namespace

112 name = "%s %s" % (ns, localname)

113 else:

114 name = localname

115 output.append("%s%s=\"%s\"" % (" " * indent, name, value))

116 # self-closing

117 if type == "EmptyTag":

118 indent -= 2

119

120 elif type == "EndTag":

121 indent -= 2

122

123 elif type == "Comment":

124 output.append("%s" % (" " * indent, token["data"]))

125

126 elif type == "Doctype":

127 if token["name"]:

128 if token["publicId"]:

129 output.append("""%s<!DOCTYPE %s "%s" "%s">""" %

130 (" " * indent,

131 token["name"],

132 token["publicId"],

133 token["systemId"] if token["systemId"] else ""))

134 elif token["systemId"]:

135 output.append("""%s<!DOCTYPE %s "" "%s">""" %

136 (" " * indent,

137 token["name"],

138 token["systemId"]))

139 else:

140 output.append("%s<!DOCTYPE %s>" % (" " * indent,

141 token["name"]))

142 else:

143 output.append("%s<!DOCTYPE >" % (" " * indent,))

144

145 elif type == "Characters":

146 output.append("%s\"%s\"" % (" " * indent, token["data"]))

147

148 elif type == "SpaceCharacters":

149 assert False, "concatenateCharacterTokens should have got rid of all Space tokens"

150

151 else:

152 raise ValueError("Unknown token type, %s" % type)

153

154 return "\n".join(output)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bleach/_vendor/html5lib/treewalkers/init.py: 10%

78 statements