Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/bs4/_typing.py: 95%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

37 statements  

1# Custom type aliases used throughout Beautiful Soup to improve readability. 

2 

3# Notes on improvements to the type system in newer versions of Python 

4# that can be used once Beautiful Soup drops support for older 

5# versions: 

6# 

7# * ClassVar can be put on class variables now. 

8# * In 3.10, x|y is an accepted shorthand for Union[x,y]. 

9# * In 3.10, TypeAlias gains capabilities that can be used to 

10# improve the tree matching types (I don't remember what, exactly). 

11# * In 3.9 it's possible to specialize the re.Match type, 

12# e.g. re.Match[str]. In 3.8 there's a typing.re namespace for this, 

13# but it's removed in 3.12, so to support the widest possible set of 

14# versions I'm not using it. 

15 

16from typing_extensions import ( 

17 runtime_checkable, 

18 Protocol, 

19 TypeAlias, 

20) 

21from typing import ( 

22 Any, 

23 Callable, 

24 Dict, 

25 IO, 

26 Iterable, 

27 Mapping, 

28 Optional, 

29 Pattern, 

30 TYPE_CHECKING, 

31 Union, 

32) 

33 

34if TYPE_CHECKING: 

35 from bs4.element import ( 

36 AttributeValueList, 

37 NamespacedAttribute, 

38 NavigableString, 

39 PageElement, 

40 ResultSet, 

41 Tag, 

42 ) 

43 

44 

45@runtime_checkable 

46class _RegularExpressionProtocol(Protocol): 

47 """A protocol object which can accept either Python's built-in 

48 `re.Pattern` objects, or the similar ``Regex`` objects defined by the 

49 third-party ``regex`` package. 

50 """ 

51 

52 def search( 

53 self, string: str, pos: int = ..., endpos: int = ... 

54 ) -> Optional[Any]: ... 

55 

56 @property 

57 def pattern(self) -> str: ... 

58 

59 

60# Aliases for markup in various stages of processing. 

61# 

62#: The rawest form of markup: either a string, bytestring, or an open filehandle. 

63_IncomingMarkup: TypeAlias = Union[str, bytes, IO[str], IO[bytes]] 

64 

65#: Markup that is in memory but has (potentially) yet to be converted 

66#: to Unicode. 

67_RawMarkup: TypeAlias = Union[str, bytes] 

68 

69# Aliases for character encodings 

70# 

71 

72#: A data encoding. 

73_Encoding: TypeAlias = str 

74 

75#: One or more data encodings. 

76_Encodings: TypeAlias = Iterable[_Encoding] 

77 

78# Aliases for XML namespaces 

79# 

80 

81#: The prefix for an XML namespace. 

82_NamespacePrefix: TypeAlias = str 

83 

84#: The URL of an XML namespace 

85_NamespaceURL: TypeAlias = str 

86 

87#: A mapping of prefixes to namespace URLs. 

88_NamespaceMapping: TypeAlias = Dict[_NamespacePrefix, _NamespaceURL] 

89 

90#: A mapping of namespace URLs to prefixes 

91_InvertedNamespaceMapping: TypeAlias = Dict[_NamespaceURL, _NamespacePrefix] 

92 

93# Aliases for the attribute values associated with HTML/XML tags. 

94# 

95 

96#: The value associated with an HTML or XML attribute. This is the 

97#: relatively unprocessed value Beautiful Soup expects to come from a 

98#: `TreeBuilder`. 

99_RawAttributeValue: TypeAlias = str 

100 

101#: A dictionary of names to `_RawAttributeValue` objects. This is how 

102#: Beautiful Soup expects a `TreeBuilder` to represent a tag's 

103#: attribute values. 

104_RawAttributeValues: TypeAlias = ( 

105 "Mapping[Union[str, NamespacedAttribute], _RawAttributeValue]" 

106) 

107 

108#: An attribute value in its final form, as stored in the 

109# `Tag` class, after it has been processed and (in some cases) 

110# split into a list of strings. 

111_AttributeValue: TypeAlias = Union[str, "AttributeValueList"] 

112 

113#: A dictionary of names to :py:data:`_AttributeValue` objects. This is what 

114#: a tag's attributes look like after processing. 

115_AttributeValues: TypeAlias = Dict[str, _AttributeValue] 

116 

117#: The methods that deal with turning :py:data:`_RawAttributeValue` into 

118#: :py:data:`_AttributeValue` may be called several times, even after the values 

119#: are already processed (e.g. when cloning a tag), so they need to 

120#: be able to acommodate both possibilities. 

121_RawOrProcessedAttributeValues: TypeAlias = Union[_RawAttributeValues, _AttributeValues] 

122 

123#: A number of tree manipulation methods can take either a `PageElement` or a 

124#: normal Python string (which will be converted to a `NavigableString`). 

125_InsertableElement: TypeAlias = Union["PageElement", str] 

126 

127# Aliases to represent the many possibilities for matching bits of a 

128# parse tree. 

129# 

130# This is very complicated because we're applying a formal type system 

131# to some very DWIM code. The types we end up with will be the types 

132# of the arguments to the SoupStrainer constructor and (more 

133# familiarly to Beautiful Soup users) the find* methods. 

134 

135#: A function that takes a PageElement and returns a yes-or-no answer. 

136_PageElementMatchFunction: TypeAlias = Callable[["PageElement"], bool] 

137 

138#: A function that takes the raw parsed ingredients of a markup tag 

139#: and returns a yes-or-no answer. 

140# Not necessary at the moment. 

141# _AllowTagCreationFunction:TypeAlias = Callable[[Optional[str], str, Optional[_RawAttributeValues]], bool] 

142 

143#: A function that takes the raw parsed ingredients of a markup string node 

144#: and returns a yes-or-no answer. 

145# Not necessary at the moment. 

146# _AllowStringCreationFunction:TypeAlias = Callable[[Optional[str]], bool] 

147 

148#: A function that takes a `Tag` and returns a yes-or-no answer. 

149#: A `TagNameMatchRule` expects this kind of function, if you're 

150#: going to pass it a function. 

151_TagMatchFunction: TypeAlias = Callable[["Tag"], bool] 

152 

153#: A function that takes a single string and returns a yes-or-no 

154#: answer. An `AttributeValueMatchRule` expects this kind of function, if 

155#: you're going to pass it a function. So does a `StringMatchRule`. 

156_StringMatchFunction: TypeAlias = Callable[[str], bool] 

157 

158#: Either a tag name, an attribute value or a string can be matched 

159#: against a string, bytestring, regular expression, or a boolean. 

160_BaseStrainable: TypeAlias = Union[str, bytes, Pattern[str], bool] 

161 

162#: A tag can be matched either with the `_BaseStrainable` options, or 

163#: using a function that takes the `Tag` as its sole argument. 

164_BaseStrainableElement: TypeAlias = Union[_BaseStrainable, _TagMatchFunction] 

165 

166#: A tag's attribute vgalue can be matched either with the 

167#: `_BaseStrainable` options, or using a function that takes that 

168#: value as its sole argument. 

169_BaseStrainableAttribute: TypeAlias = Union[_BaseStrainable, _StringMatchFunction] 

170 

171#: A tag can be matched using either a single criterion or a list of 

172#: criteria. 

173_StrainableElement: TypeAlias = Union[ 

174 _BaseStrainableElement, Iterable[_BaseStrainableElement] 

175] 

176 

177#: An attribute value can be matched using either a single criterion 

178#: or a list of criteria. 

179_StrainableAttribute: TypeAlias = Union[ 

180 _BaseStrainableAttribute, Iterable[_BaseStrainableAttribute] 

181] 

182 

183#: An string can be matched using the same techniques as 

184#: an attribute value. 

185_StrainableString: TypeAlias = _StrainableAttribute 

186 

187#: A dictionary may be used to match against multiple attribute vlaues at once. 

188_StrainableAttributes: TypeAlias = Dict[str, _StrainableAttribute] 

189 

190#: Many Beautiful soup methods return a PageElement or an ResultSet of 

191#: PageElements. A PageElement is either a Tag or a NavigableString. 

192#: These convenience aliases make it easier for IDE users to see which methods 

193#: are available on the objects they're dealing with. 

194_OneElement: TypeAlias = Union["PageElement", "Tag", "NavigableString"] 

195_AtMostOneElement: TypeAlias = Optional[_OneElement] 

196_QueryResults: TypeAlias = "ResultSet[_OneElement]"