Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/bs4/_typing.py: 95%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Custom type aliases used throughout Beautiful Soup to improve readability.
3# Notes on improvements to the type system in newer versions of Python
4# that can be used once Beautiful Soup drops support for older
5# versions:
6#
7# * ClassVar can be put on class variables now.
8# * In 3.10, x|y is an accepted shorthand for Union[x,y].
9# * In 3.10, TypeAlias gains capabilities that can be used to
10# improve the tree matching types (I don't remember what, exactly).
11# * In 3.9 it's possible to specialize the re.Match type,
12# e.g. re.Match[str]. In 3.8 there's a typing.re namespace for this,
13# but it's removed in 3.12, so to support the widest possible set of
14# versions I'm not using it.
16from typing_extensions import (
17 runtime_checkable,
18 Protocol,
19 TypeAlias,
20)
21from typing import (
22 Any,
23 Callable,
24 Dict,
25 IO,
26 Iterable,
27 Mapping,
28 Optional,
29 Pattern,
30 TYPE_CHECKING,
31 Union,
32)
34if TYPE_CHECKING:
35 from bs4.element import (
36 AttributeValueList,
37 NamespacedAttribute,
38 NavigableString,
39 PageElement,
40 ResultSet,
41 Tag,
42 )
45@runtime_checkable
46class _RegularExpressionProtocol(Protocol):
47 """A protocol object which can accept either Python's built-in
48 `re.Pattern` objects, or the similar ``Regex`` objects defined by the
49 third-party ``regex`` package.
50 """
52 def search(
53 self, string: str, pos: int = ..., endpos: int = ...
54 ) -> Optional[Any]: ...
56 @property
57 def pattern(self) -> str: ...
60# Aliases for markup in various stages of processing.
61#
62#: The rawest form of markup: either a string, bytestring, or an open filehandle.
63_IncomingMarkup: TypeAlias = Union[str, bytes, IO[str], IO[bytes]]
65#: Markup that is in memory but has (potentially) yet to be converted
66#: to Unicode.
67_RawMarkup: TypeAlias = Union[str, bytes]
69# Aliases for character encodings
70#
72#: A data encoding.
73_Encoding: TypeAlias = str
75#: One or more data encodings.
76_Encodings: TypeAlias = Iterable[_Encoding]
78# Aliases for XML namespaces
79#
81#: The prefix for an XML namespace.
82_NamespacePrefix: TypeAlias = str
84#: The URL of an XML namespace
85_NamespaceURL: TypeAlias = str
87#: A mapping of prefixes to namespace URLs.
88_NamespaceMapping: TypeAlias = Dict[_NamespacePrefix, _NamespaceURL]
90#: A mapping of namespace URLs to prefixes
91_InvertedNamespaceMapping: TypeAlias = Dict[_NamespaceURL, _NamespacePrefix]
93# Aliases for the attribute values associated with HTML/XML tags.
94#
96#: The value associated with an HTML or XML attribute. This is the
97#: relatively unprocessed value Beautiful Soup expects to come from a
98#: `TreeBuilder`.
99_RawAttributeValue: TypeAlias = str
101#: A dictionary of names to `_RawAttributeValue` objects. This is how
102#: Beautiful Soup expects a `TreeBuilder` to represent a tag's
103#: attribute values.
104_RawAttributeValues: TypeAlias = (
105 "Mapping[Union[str, NamespacedAttribute], _RawAttributeValue]"
106)
108#: An attribute value in its final form, as stored in the
109# `Tag` class, after it has been processed and (in some cases)
110# split into a list of strings.
111_AttributeValue: TypeAlias = Union[str, "AttributeValueList"]
113#: A dictionary of names to :py:data:`_AttributeValue` objects. This is what
114#: a tag's attributes look like after processing.
115_AttributeValues: TypeAlias = Dict[str, _AttributeValue]
117#: The methods that deal with turning :py:data:`_RawAttributeValue` into
118#: :py:data:`_AttributeValue` may be called several times, even after the values
119#: are already processed (e.g. when cloning a tag), so they need to
120#: be able to acommodate both possibilities.
121_RawOrProcessedAttributeValues: TypeAlias = Union[_RawAttributeValues, _AttributeValues]
123#: A number of tree manipulation methods can take either a `PageElement` or a
124#: normal Python string (which will be converted to a `NavigableString`).
125_InsertableElement: TypeAlias = Union["PageElement", str]
127# Aliases to represent the many possibilities for matching bits of a
128# parse tree.
129#
130# This is very complicated because we're applying a formal type system
131# to some very DWIM code. The types we end up with will be the types
132# of the arguments to the SoupStrainer constructor and (more
133# familiarly to Beautiful Soup users) the find* methods.
135#: A function that takes a PageElement and returns a yes-or-no answer.
136_PageElementMatchFunction: TypeAlias = Callable[["PageElement"], bool]
138#: A function that takes the raw parsed ingredients of a markup tag
139#: and returns a yes-or-no answer.
140# Not necessary at the moment.
141# _AllowTagCreationFunction:TypeAlias = Callable[[Optional[str], str, Optional[_RawAttributeValues]], bool]
143#: A function that takes the raw parsed ingredients of a markup string node
144#: and returns a yes-or-no answer.
145# Not necessary at the moment.
146# _AllowStringCreationFunction:TypeAlias = Callable[[Optional[str]], bool]
148#: A function that takes a `Tag` and returns a yes-or-no answer.
149#: A `TagNameMatchRule` expects this kind of function, if you're
150#: going to pass it a function.
151_TagMatchFunction: TypeAlias = Callable[["Tag"], bool]
153#: A function that takes a single string and returns a yes-or-no
154#: answer. An `AttributeValueMatchRule` expects this kind of function, if
155#: you're going to pass it a function. So does a `StringMatchRule`.
156_StringMatchFunction: TypeAlias = Callable[[str], bool]
158#: Either a tag name, an attribute value or a string can be matched
159#: against a string, bytestring, regular expression, or a boolean.
160_BaseStrainable: TypeAlias = Union[str, bytes, Pattern[str], bool]
162#: A tag can be matched either with the `_BaseStrainable` options, or
163#: using a function that takes the `Tag` as its sole argument.
164_BaseStrainableElement: TypeAlias = Union[_BaseStrainable, _TagMatchFunction]
166#: A tag's attribute vgalue can be matched either with the
167#: `_BaseStrainable` options, or using a function that takes that
168#: value as its sole argument.
169_BaseStrainableAttribute: TypeAlias = Union[_BaseStrainable, _StringMatchFunction]
171#: A tag can be matched using either a single criterion or a list of
172#: criteria.
173_StrainableElement: TypeAlias = Union[
174 _BaseStrainableElement, Iterable[_BaseStrainableElement]
175]
177#: An attribute value can be matched using either a single criterion
178#: or a list of criteria.
179_StrainableAttribute: TypeAlias = Union[
180 _BaseStrainableAttribute, Iterable[_BaseStrainableAttribute]
181]
183#: An string can be matched using the same techniques as
184#: an attribute value.
185_StrainableString: TypeAlias = _StrainableAttribute
187#: A dictionary may be used to match against multiple attribute vlaues at once.
188_StrainableAttributes: TypeAlias = Dict[str, _StrainableAttribute]
190#: Many Beautiful soup methods return a PageElement or an ResultSet of
191#: PageElements. A PageElement is either a Tag or a NavigableString.
192#: These convenience aliases make it easier for IDE users to see which methods
193#: are available on the objects they're dealing with.
194_OneElement: TypeAlias = Union["PageElement", "Tag", "NavigableString"]
195_AtMostOneElement: TypeAlias = Optional[_OneElement]
196_QueryResults: TypeAlias = "ResultSet[_OneElement]"