Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bleach/_vendor/html5lib/treewalkers/etree.py: 92%

79 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:27 +0000

1from __future__ import absolute_import, division, unicode_literals 

2 

3from collections import OrderedDict 

4import re 

5 

6from six import string_types 

7 

8from . import base 

9from .._utils import moduleFactoryFactory 

10 

11tag_regexp = re.compile("{([^}]*)}(.*)") 

12 

13 

14def getETreeBuilder(ElementTreeImplementation): 

15 ElementTree = ElementTreeImplementation 

16 ElementTreeCommentType = ElementTree.Comment("asd").tag 

17 

18 class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable 

19 """Given the particular ElementTree representation, this implementation, 

20 to avoid using recursion, returns "nodes" as tuples with the following 

21 content: 

22 

23 1. The current element 

24 

25 2. The index of the element relative to its parent 

26 

27 3. A stack of ancestor elements 

28 

29 4. A flag "text", "tail" or None to indicate if the current node is a 

30 text node; either the text or tail of the current element (1) 

31 """ 

32 def getNodeDetails(self, node): 

33 if isinstance(node, tuple): # It might be the root Element 

34 elt, _, _, flag = node 

35 if flag in ("text", "tail"): 

36 return base.TEXT, getattr(elt, flag) 

37 else: 

38 node = elt 

39 

40 if not(hasattr(node, "tag")): 

41 node = node.getroot() 

42 

43 if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): 

44 return (base.DOCUMENT,) 

45 

46 elif node.tag == "<!DOCTYPE>": 

47 return (base.DOCTYPE, node.text, 

48 node.get("publicId"), node.get("systemId")) 

49 

50 elif node.tag == ElementTreeCommentType: 

51 return base.COMMENT, node.text 

52 

53 else: 

54 assert isinstance(node.tag, string_types), type(node.tag) 

55 # This is assumed to be an ordinary element 

56 match = tag_regexp.match(node.tag) 

57 if match: 

58 namespace, tag = match.groups() 

59 else: 

60 namespace = None 

61 tag = node.tag 

62 attrs = OrderedDict() 

63 for name, value in list(node.attrib.items()): 

64 match = tag_regexp.match(name) 

65 if match: 

66 attrs[(match.group(1), match.group(2))] = value 

67 else: 

68 attrs[(None, name)] = value 

69 return (base.ELEMENT, namespace, tag, 

70 attrs, len(node) or node.text) 

71 

72 def getFirstChild(self, node): 

73 if isinstance(node, tuple): 

74 element, key, parents, flag = node 

75 else: 

76 element, key, parents, flag = node, None, [], None 

77 

78 if flag in ("text", "tail"): 

79 return None 

80 else: 

81 if element.text: 

82 return element, key, parents, "text" 

83 elif len(element): 

84 parents.append(element) 

85 return element[0], 0, parents, None 

86 else: 

87 return None 

88 

89 def getNextSibling(self, node): 

90 if isinstance(node, tuple): 

91 element, key, parents, flag = node 

92 else: 

93 return None 

94 

95 if flag == "text": 

96 if len(element): 

97 parents.append(element) 

98 return element[0], 0, parents, None 

99 else: 

100 return None 

101 else: 

102 if element.tail and flag != "tail": 

103 return element, key, parents, "tail" 

104 elif key < len(parents[-1]) - 1: 

105 return parents[-1][key + 1], key + 1, parents, None 

106 else: 

107 return None 

108 

109 def getParentNode(self, node): 

110 if isinstance(node, tuple): 

111 element, key, parents, flag = node 

112 else: 

113 return None 

114 

115 if flag == "text": 

116 if not parents: 

117 return element 

118 else: 

119 return element, key, parents, None 

120 else: 

121 parent = parents.pop() 

122 if not parents: 

123 return parent 

124 else: 

125 assert list(parents[-1]).count(parent) == 1 

126 return parent, list(parents[-1]).index(parent), parents, None 

127 

128 return locals() 

129 

130 

131getETreeModule = moduleFactoryFactory(getETreeBuilder)