Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/bs4/element.py: 39%

1481 """An `HTML comment <https://dev.w3.org/html5/spec-LC/syntax.html#comments>`_ or `XML comment <https://www.w3.org/TR/REC-xml/#sec-comments>`_."""

1482

1483 PREFIX: str = "<!--"

1484 SUFFIX: str = "-->"

1485

1486

1487class Declaration(PreformattedString):

1488 """An `XML declaration <https://www.w3.org/TR/REC-xml/#sec-prolog-dtd>`_."""

1489

1490 PREFIX: str = "<?"

1491 SUFFIX: str = "?>"

1492

1493

1494class Doctype(PreformattedString):

1495 """A `document type declaration <https://www.w3.org/TR/REC-xml/#dt-doctype>`_."""

1496

1497 @classmethod

1498 def for_name_and_ids(

1499 cls, name: str, pub_id: Optional[str], system_id: Optional[str]

1500 ) -> Doctype:

1501 """Generate an appropriate document type declaration for a given

1502 public ID and system ID.

1503

1504 :param name: The name of the document's root element, e.g. 'html'.

1505 :param pub_id: The Formal Public Identifier for this document type,

1506 e.g. '-//W3C//DTD XHTML 1.1//EN'

1507 :param system_id: The system identifier for this document type,

1508 e.g. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'

1509 """

1510 return Doctype(cls._string_for_name_and_ids(name, pub_id, system_id))

1511

1512 @classmethod

1513 def _string_for_name_and_ids(

1514 self, name: str, pub_id: Optional[str], system_id: Optional[str]

1515 ) -> str:

1516 """Generate a string to be used as the basis of a Doctype object.

1517

1518 This is a separate method from for_name_and_ids() because the lxml

1519 TreeBuilder needs to call it.

1520 """

1521 value = name or ""

1522 if pub_id is not None:

1523 value += ' PUBLIC "%s"' % pub_id

1524 if system_id is not None:

1525 value += ' "%s"' % system_id

1526 elif system_id is not None:

1527 value += ' SYSTEM "%s"' % system_id

1528 return value

1529

1530 PREFIX: str = "<!DOCTYPE "

1531 SUFFIX: str = ">\n"

1532

1533

1534class Stylesheet(NavigableString):

1535 """A `NavigableString` representing the contents of a `<style> HTML

1536 tag <https://dev.w3.org/html5/spec-LC/Overview.html#the-style-element>`_

1537 (probably CSS).

1538

1539 Used to distinguish embedded stylesheets from textual content.

1540 """

1541

1542

1543class Script(NavigableString):

1544 """A `NavigableString` representing the contents of a `<script>

1545 HTML tag

1546 <https://dev.w3.org/html5/spec-LC/Overview.html#the-script-element>`_

1547 (probably Javascript).

1548

1549 Used to distinguish executable code from textual content.

1550 """

1551

1552

1553class TemplateString(NavigableString):

1554 """A `NavigableString` representing a string found inside an `HTML

1555 <template> tag <https://html.spec.whatwg.org/multipage/scripting.html#the-template-element>`_

1556 embedded in a larger document.

1557

1558 Used to distinguish such strings from the main body of the document.

1559 """

1560

1561

1562class RubyTextString(NavigableString):

1563 """A NavigableString representing the contents of an `<rt> HTML

1564 tag <https://dev.w3.org/html5/spec-LC/text-level-semantics.html#the-rt-element>`_.

1565

1566 Can be used to distinguish such strings from the strings they're

1567 annotating.

1568 """

1569

1570

1571class RubyParenthesisString(NavigableString):

1572 """A NavigableString representing the contents of an `<rp> HTML

1573 tag <https://dev.w3.org/html5/spec-LC/text-level-semantics.html#the-rp-element>`_.

1574 """

1575

1576

1577class Tag(PageElement):

1578 """An HTML or XML tag that is part of a parse tree, along with its

1579 attributes, contents, and relationships to other parts of the tree.

1580

1581 When Beautiful Soup parses the markup ``<b>penguin</b>``, it will

1582 create a `Tag` object representing the ``<b>`` tag. You can

1583 instantiate `Tag` objects directly, but it's not necessary unless

1584 you're adding entirely new markup to a parsed document. Most of

1585 the constructor arguments are intended for use by the `TreeBuilder`

1586 that's parsing a document.

1587

1588 :param parser: A `BeautifulSoup` object representing the parse tree this

1589 `Tag` will be part of.

1590 :param builder: The `TreeBuilder` being used to build the tree.

1591 :param name: The name of the tag.

1592 :param namespace: The URI of this tag's XML namespace, if any.

1593 :param prefix: The prefix for this tag's XML namespace, if any.

1594 :param attrs: A dictionary of attribute values.

1595 :param parent: The `Tag` to use as the parent of this `Tag`. May be

1596 the `BeautifulSoup` object itself.

1597 :param previous: The `PageElement` that was parsed immediately before

1598 parsing this tag.

1599 :param is_xml: If True, this is an XML tag. Otherwise, this is an

1600 HTML tag.

1601 :param sourceline: The line number where this tag was found in its

1602 source document.

1603 :param sourcepos: The character position within ``sourceline`` where this

1604 tag was found.

1605 :param can_be_empty_element: If True, this tag should be

1606 represented as <tag/>. If False, this tag should be represented

1607 as <tag></tag>.

1608 :param cdata_list_attributes: A dictionary of attributes whose values should

1609 be parsed as lists of strings if they ever show up on this tag.

1610 :param preserve_whitespace_tags: Names of tags whose contents

1611 should have their whitespace preserved if they are encountered inside

1612 this tag.

1613 :param interesting_string_types: When iterating over this tag's

1614 string contents in methods like `Tag.strings` or

1615 `PageElement.get_text`, these are the types of strings that are

1616 interesting enough to be considered. By default,

1617 `NavigableString` (normal strings) and `CData` (CDATA

1618 sections) are the only interesting string subtypes.

1619 :param namespaces: A dictionary mapping currently active

1620 namespace prefixes to URIs, as of the point in the parsing process when

1621 this tag was encountered. This can be used later to

1622 construct CSS selectors.

1623

1624 """

1625

1626 def __init__(

1627 self,

1628 parser: Optional[BeautifulSoup] = None,

1629 builder: Optional[TreeBuilder] = None,

1630 name: Optional[str] = None,

1631 namespace: Optional[str] = None,

1632 prefix: Optional[str] = None,

1633 attrs: Optional[_RawOrProcessedAttributeValues] = None,

1634 parent: Optional[Union[BeautifulSoup, Tag]] = None,

1635 previous: _AtMostOneElement = None,

1636 is_xml: Optional[bool] = None,

1637 sourceline: Optional[int] = None,

1638 sourcepos: Optional[int] = None,

1639 can_be_empty_element: Optional[bool] = None,

1640 cdata_list_attributes: Optional[Dict[str, Set[str]]] = None,

1641 preserve_whitespace_tags: Optional[Set[str]] = None,

1642 interesting_string_types: Optional[Set[Type[NavigableString]]] = None,

1643 namespaces: Optional[Dict[str, str]] = None,

1644 # NOTE: Any new arguments here need to be mirrored in

1645 # Tag.copy_self, and potentially BeautifulSoup.new_tag

1646 # as well.

1647 ):

1648 if parser is None:

1649 self.parser_class = None

1650 else:

1651 # We don't actually store the parser object: that lets extracted

1652 # chunks be garbage-collected.

1653 self.parser_class = parser.__class__

1654 if name is None:

1655 raise ValueError("No value provided for new tag's name.")

1656 self.name = name

1657 self.namespace = namespace

1658 self._namespaces = namespaces or {}

1659 self.prefix = prefix

1660 if (not builder or builder.store_line_numbers) and (

1661 sourceline is not None or sourcepos is not None

1662 ):

1663 self.sourceline = sourceline

1664 self.sourcepos = sourcepos

1665 else:

1666 self.sourceline = sourceline

1667 self.sourcepos = sourcepos

1668

1669 attr_dict_class: type[AttributeDict]

1670 attribute_value_list_class: type[AttributeValueList]

1671 if builder is None:

1672 if is_xml:

1673 attr_dict_class = XMLAttributeDict

1674 else:

1675 attr_dict_class = HTMLAttributeDict

1676 attribute_value_list_class = AttributeValueList

1677 else:

1678 attr_dict_class = builder.attribute_dict_class

1679 attribute_value_list_class = builder.attribute_value_list_class

1680 self.attribute_value_list_class = attribute_value_list_class

1681

1682 if attrs is None:

1683 self.attrs = attr_dict_class()

1684 else:

1685 if builder is not None and builder.cdata_list_attributes:

1686 self.attrs = builder._replace_cdata_list_attribute_values(

1687 self.name, attrs

1688 )

1689 else:

1690 self.attrs = attr_dict_class()

1691 # Make sure that the values of any multi-valued

1692 # attributes (e.g. when a Tag is copied) are stored in

1693 # new lists.

1694 for k, v in attrs.items():

1695 if isinstance(v, list):

1696 v = v.__class__(v)

1697 self.attrs[k] = v

1698

1699 # If possible, determine ahead of time whether this tag is an

1700 # XML tag.

1701 if builder:

1702 self.known_xml = builder.is_xml

1703 else:

1704 self.known_xml = is_xml

1705 self.contents: List[PageElement] = []

1706 self.setup(parent, previous)

1707 self.hidden = False

1708

1709 if builder is None:

1710 # In the absence of a TreeBuilder, use whatever values were

1711 # passed in here. They're probably None, unless this is a copy of some

1712 # other tag.

1713 self.can_be_empty_element = can_be_empty_element

1714 self.cdata_list_attributes = cdata_list_attributes

1715 self.preserve_whitespace_tags = preserve_whitespace_tags

1716 self.interesting_string_types = interesting_string_types

1717 else:

1718 # Set up any substitutions for this tag, such as the charset in a META tag.

1719 self.attribute_value_list_class = builder.attribute_value_list_class

1720 builder.set_up_substitutions(self)

1721

1722 # Ask the TreeBuilder whether this tag might be an empty-element tag.

1723 self.can_be_empty_element = builder.can_be_empty_element(name)

1724

1725 # Keep track of the list of attributes of this tag that

1726 # might need to be treated as a list.

1727 #

1728 # For performance reasons, we store the whole data structure

1729 # rather than asking the question of every tag. Asking would

1730 # require building a new data structure every time, and

1731 # (unlike can_be_empty_element), we almost never need

1732 # to check this.

1733 self.cdata_list_attributes = builder.cdata_list_attributes

1734

1735 # Keep track of the names that might cause this tag to be treated as a

1736 # whitespace-preserved tag.

1737 self.preserve_whitespace_tags = builder.preserve_whitespace_tags

1738

1739 if self.name in builder.string_containers:

1740 # This sort of tag uses a special string container

1741 # subclass for most of its strings. We need to be able

1742 # to look up the proper container subclass.

1743 self.interesting_string_types = {builder.string_containers[self.name]}

1744 else:

1745 self.interesting_string_types = self.MAIN_CONTENT_STRING_TYPES

1746

1747 parser_class: Optional[type[BeautifulSoup]]

1748 name: str

1749 namespace: Optional[str]

1750 prefix: Optional[str]

1751 attrs: _AttributeValues

1752 sourceline: Optional[int]

1753 sourcepos: Optional[int]

1754 known_xml: Optional[bool]

1755 contents: List[PageElement]

1756 hidden: bool

1757 interesting_string_types: Optional[Set[Type[NavigableString]]]

1758

1759 can_be_empty_element: Optional[bool]

1760 cdata_list_attributes: Optional[Dict[str, Set[str]]]

1761 preserve_whitespace_tags: Optional[Set[str]]

1762

1763 #: :meta private:

1764 parserClass = _deprecated_alias("parserClass", "parser_class", "4.0.0")

1765

1766 def __deepcopy__(self, memo: Dict[Any, Any], recursive: bool = True) -> Self:

1767 """A deepcopy of a Tag is a new Tag, unconnected to the parse tree.

1768 Its contents are a copy of the old Tag's contents.

1769 """

1770 clone = self.copy_self()

1771

1772 if recursive:

1773 # Clone this tag's descendants recursively, but without

1774 # making any recursive function calls.

1775 tag_stack: List[Tag] = [clone]

1776 for event, element in self._event_stream(self.descendants):

1777 if event is Tag.END_ELEMENT_EVENT:

1778 # Stop appending incoming Tags to the Tag that was

1779 # just closed.

1780 tag_stack.pop()

1781 else:

1782 descendant_clone = element.__deepcopy__(memo, recursive=False)

1783 # Add to its parent's .contents

1784 tag_stack[-1].append(descendant_clone)

1785

1786 if event is Tag.START_ELEMENT_EVENT:

1787 # Add the Tag itself to the stack so that its

1788 # children will be .appended to it.

1789 tag_stack.append(cast(Tag, descendant_clone))

1790 return clone

1791

1792 def copy_self(self) -> Self:

1793 """Create a new Tag just like this one, but with no

1794 contents and unattached to any parse tree.

1795

1796 This is the first step in the deepcopy process, but you can

1797 call it on its own to create a copy of a Tag without copying its

1798 contents.

1799 """

1800 clone = type(self)(

1801 None,

1802 None,

1803 self.name,

1804 self.namespace,

1805 self.prefix,

1806 self.attrs,

1807 is_xml=self._is_xml,

1808 sourceline=self.sourceline,

1809 sourcepos=self.sourcepos,

1810 can_be_empty_element=self.can_be_empty_element,

1811 cdata_list_attributes=self.cdata_list_attributes,

1812 preserve_whitespace_tags=self.preserve_whitespace_tags,

1813 interesting_string_types=self.interesting_string_types,

1814 namespaces=self._namespaces,

1815 )

1816 for attr in ("can_be_empty_element", "hidden"):

1817 setattr(clone, attr, getattr(self, attr))

1818 return clone

1819

1820 @property

1821 def is_empty_element(self) -> bool:

1822 """Is this tag an empty-element tag? (aka a self-closing tag)

1823

1824 A tag that has contents is never an empty-element tag.

1825

1826 A tag that has no contents may or may not be an empty-element

1827 tag. It depends on the `TreeBuilder` used to create the

1828 tag. If the builder has a designated list of empty-element

1829 tags, then only a tag whose name shows up in that list is

1830 considered an empty-element tag. This is usually the case

1831 for HTML documents.

1832

1833 If the builder has no designated list of empty-element, then

1834 any tag with no contents is an empty-element tag. This is usually

1835 the case for XML documents.

1836 """

1837 return len(self.contents) == 0 and self.can_be_empty_element is True

1838

1839 @_deprecated("is_empty_element", "4.0.0")

1840 def isSelfClosing(self) -> bool:

1841 ": :meta private:"

1842 return self.is_empty_element

1843

1844 @property

1845 def string(self) -> Optional[str]:

1846 """Convenience property to get the single string within this

1847 `Tag`, assuming there is just one.

1848

1849 :return: If this `Tag` has a single child that's a

1850 `NavigableString`, the return value is that string. If this

1851 element has one child `Tag`, the return value is that child's

1852 `Tag.string`, recursively. If this `Tag` has no children,

1853 or has more than one child, the return value is ``None``.

1854

1855 If this property is unexpectedly returning ``None`` for you,

1856 it's probably because your `Tag` has more than one thing

1857 inside it.

1858 """

1859 if len(self.contents) != 1:

1860 return None

1861 child = self.contents[0]

1862 if isinstance(child, NavigableString):

1863 return child

1864 elif isinstance(child, Tag):

1865 return child.string

1866 return None

1867

1868 @string.setter

1869 def string(self, string: str) -> None:

1870 """Replace the `Tag.contents` of this `Tag` with a single string."""

1871 self.clear()

1872 if isinstance(string, NavigableString):

1873 new_class = string.__class__

1874 else:

1875 new_class = NavigableString

1876 self.append(new_class(string))

1877

1878 #: :meta private:

1879 MAIN_CONTENT_STRING_TYPES = {NavigableString, CData}

1880

1881 def _all_strings(

1882 self, strip: bool = False, types: _OneOrMoreStringTypes = PageElement.default

1883 ) -> Iterator[str]:

1884 """Yield all strings of certain classes, possibly stripping them.

1885

1886 :param strip: If True, all strings will be stripped before being

1887 yielded.

1888

1889 :param types: A tuple of NavigableString subclasses. Any strings of

1890 a subclass not found in this list will be ignored. By

1891 default, the subclasses considered are the ones found in

1892 self.interesting_string_types. If that's not specified,

1893 only NavigableString and CData objects will be

1894 considered. That means no comments, processing

1895 instructions, etc.

1896 """

1897 if types is self.default:

1898 if self.interesting_string_types is None:

1899 types = self.MAIN_CONTENT_STRING_TYPES

1900 else:

1901 types = self.interesting_string_types

1902

1903 for descendant in self.descendants:

1904 if not isinstance(descendant, NavigableString):

1905 continue

1906 descendant_type = type(descendant)

1907 if isinstance(types, type):

1908 if descendant_type is not types:

1909 # We're not interested in strings of this type.

1910 continue

1911 elif types is not None and descendant_type not in types:

1912 # We're not interested in strings of this type.

1913 continue

1914 if strip:

1915 stripped = descendant.strip()

1916 if len(stripped) == 0:

1917 continue

1918 yield stripped

1919 else:

1920 yield descendant

1921

1922 strings = property(_all_strings)

1923

1924 def insert(self, position: int, *new_children: _InsertableElement) -> List[PageElement]:

1925 """Insert one or more new PageElements as a child of this `Tag`.

1926

1927 This works similarly to :py:meth:`list.insert`, except you can insert

1928 multiple elements at once.

1929

1930 :param position: The numeric position that should be occupied

1931 in this Tag's `Tag.children` by the first new `PageElement`.

1932

1933 :param new_children: The PageElements to insert.

1934

1935 :return The newly inserted PageElements.

1936 """

1937 inserted: List[PageElement] = []

1938 for new_child in new_children:

1939 inserted.extend(self._insert(position, new_child))

1940 position += 1

1941 return inserted

1942

1943 def _insert(self, position: int, new_child: _InsertableElement) -> List[PageElement]:

1944 if new_child is None:

1945 raise ValueError("Cannot insert None into a tag.")

1946 if new_child is self:

1947 raise ValueError("Cannot insert a tag into itself.")

1948 if isinstance(new_child, str) and not isinstance(new_child, NavigableString):

1949 new_child = NavigableString(new_child)

1950

1951 from bs4 import BeautifulSoup

1952 if isinstance(new_child, BeautifulSoup):

1953 # We don't want to end up with a situation where one BeautifulSoup

1954 # object contains another. Insert the BeautifulSoup's children and

1955 # return them.

1956 return self.insert(position, *list(new_child.contents))

1957 position = min(position, len(self.contents))

1958 if hasattr(new_child, "parent") and new_child.parent is not None:

1959 # We're 'inserting' an element that's already one

1960 # of this object's children.

1961 if new_child.parent is self:

1962 current_index = self.index(new_child)

1963 if current_index < position:

1964 # We're moving this element further down the list

1965 # of this object's children. That means that when

1966 # we extract this element, our target index will

1967 # jump down one.

1968 position -= 1

1969 elif current_index == position:

1970 # We're 'inserting' an element into its current location.

1971 # This is a no-op.

1972 return [new_child]

1973 new_child.extract()

1974

1975 new_child.parent = self

1976 previous_child = None

1977 if position == 0:

1978 new_child.previous_sibling = None

1979 new_child.previous_element = self

1980 else:

1981 previous_child = self.contents[position - 1]

1982 new_child.previous_sibling = previous_child

1983 new_child.previous_sibling.next_sibling = new_child

1984 new_child.previous_element = previous_child._last_descendant(False)

1985 if new_child.previous_element is not None:

1986 new_child.previous_element.next_element = new_child

1987

1988 new_childs_last_element = new_child._last_descendant(

1989 is_initialized=False, accept_self=True

1990 )

1991 # new_childs_last_element can't be None because we passed

1992 # accept_self=True into _last_descendant. Worst case,

1993 # new_childs_last_element will be new_child itself. Making

1994 # this cast removes several mypy complaints later on as we

1995 # manipulate new_childs_last_element.

1996 new_childs_last_element = cast(PageElement, new_childs_last_element)

1997

1998 if position >= len(self.contents):

1999 new_child.next_sibling = None

2000

2001 parent: Optional[Tag] = self

2002 parents_next_sibling = None

2003 while parents_next_sibling is None and parent is not None:

2004 parents_next_sibling = parent.next_sibling

2005 parent = parent.parent

2006 if parents_next_sibling is not None:

2007 # We found the element that comes next in the document.

2008 break

2009 if parents_next_sibling is not None:

2010 new_childs_last_element.next_element = parents_next_sibling

2011 else:

2012 # The last element of this tag is the last element in

2013 # the document.

2014 new_childs_last_element.next_element = None

2015 else:

2016 next_child = self.contents[position]

2017 new_child.next_sibling = next_child

2018 if new_child.next_sibling is not None:

2019 new_child.next_sibling.previous_sibling = new_child

2020 new_childs_last_element.next_element = next_child

2021

2022 if new_childs_last_element.next_element is not None:

2023 new_childs_last_element.next_element.previous_element = (

2024 new_childs_last_element

2025 )

2026 self.contents.insert(position, new_child)

2027

2028 return [new_child]

2029

2030 def unwrap(self) -> Self:

2031 """Replace this `PageElement` with its contents.

2032

2033 :return: This object, no longer part of the tree.

2034 """

2035 my_parent = self.parent

2036 if my_parent is None:

2037 raise ValueError(

2038 "Cannot replace an element with its contents when that "

2039 "element is not part of a tree."

2040 )

2041 my_index = my_parent.index(self)

2042 self.extract(_self_index=my_index)

2043 for child in reversed(self.contents[:]):

2044 my_parent.insert(my_index, child)

2045 return self

2046

2047 replace_with_children = unwrap

2048

2049 @_deprecated("unwrap", "4.0.0")

2050 def replaceWithChildren(self) -> _OneElement:

2051 ": :meta private:"

2052 return self.unwrap()

2053

2054 def append(self, tag: _InsertableElement) -> PageElement:

2055 """

2056 Appends the given `PageElement` to the contents of this `Tag`.

2057

2058 :param tag: A PageElement.

2059

2060 :return The newly appended PageElement.

2061 """

2062 return self.insert(len(self.contents), tag)[0]

2063

2064 def extend(self, tags: Union[Iterable[_InsertableElement], Tag]) -> List[PageElement]:

2065 """Appends one or more objects to the contents of this

2066 `Tag`.

2067

2068 :param tags: If a list of `PageElement` objects is provided,

2069 they will be appended to this tag's contents, one at a time.

2070 If a single `Tag` is provided, its `Tag.contents` will be

2071 used to extend this object's `Tag.contents`.

2072

2073 :return The list of PageElements that were appended.

2074 """

2075 tag_list: Iterable[_InsertableElement]

2076

2077 if isinstance(tags, Tag):

2078 tag_list = list(tags.contents)

2079 elif isinstance(tags, (PageElement, str)):

2080 # The caller should really be using append() instead,

2081 # but we can make it work.

2082 warnings.warn(

2083 "A single non-Tag item was passed into Tag.extend. Use Tag.append instead.",

2084 UserWarning,

2085 stacklevel=2,

2086 )

2087 if isinstance(tags, str) and not isinstance(tags, PageElement):

2088 tags = NavigableString(tags)

2089 tag_list = [tags]

2090 elif isinstance(tags, Iterable):

2091 # Moving items around the tree may change their position in

2092 # the original list. Make a list that won't change.

2093 tag_list = list(tags)

2094

2095 results: List[PageElement] = []

2096 for tag in tag_list:

2097 results.append(self.append(tag))

2098

2099 return results

2100

2101 def clear(self, decompose: bool = False) -> None:

2102 """Destroy all children of this `Tag` by calling

2103 `PageElement.extract` on them.

2104

2105 :param decompose: If this is True, `PageElement.decompose` (a

2106 more destructive method) will be called instead of

2107 `PageElement.extract`.

2108 """

2109 for element in self.contents[:]:

2110 if decompose:

2111 element.decompose()

2112 else:

2113 element.extract()

2114

2115 def smooth(self) -> None:

2116 """Smooth out the children of this `Tag` by consolidating consecutive

2117 strings.

2118

2119 If you perform a lot of operations that modify the tree,

2120 calling this method afterwards can make pretty-printed output

2121 look more natural.

2122 """

2123 # Mark the first position of every pair of children that need

2124 # to be consolidated. Do this rather than making a copy of

2125 # self.contents, since in most cases very few strings will be

2126 # affected.

2127 marked = []

2128 for i, a in enumerate(self.contents):

2129 if isinstance(a, Tag):

2130 # Recursively smooth children.

2131 a.smooth()

2132 if i == len(self.contents) - 1:

2133 # This is the last item in .contents, and it's not a

2134 # tag. There's no chance it needs any work.

2135 continue

2136 b = self.contents[i + 1]

2137 if (

2138 isinstance(a, NavigableString)

2139 and isinstance(b, NavigableString)

2140 and not isinstance(a, PreformattedString)

2141 and not isinstance(b, PreformattedString)

2142 ):

2143 marked.append(i)

2144

2145 # Go over the marked positions in reverse order, so that

2146 # removing items from .contents won't affect the remaining

2147 # positions.

2148 for i in reversed(marked):

2149 a = cast(NavigableString, self.contents[i])

2150 b = cast(NavigableString, self.contents[i + 1])

2151 b.extract()

2152 n = NavigableString(a + b)

2153 a.replace_with(n)

2154

2155 def index(self, element: PageElement) -> int:

2156 """Find the index of a child of this `Tag` (by identity, not value).

2157

2158 Doing this by identity avoids issues when a `Tag` contains two

2159 children that have string equality.

2160

2161 :param element: Look for this `PageElement` in this object's contents.

2162 """

2163 for i, child in enumerate(self.contents):

2164 if child is element:

2165 return i

2166 raise ValueError("Tag.index: element not in tag")

2167

2168 def get(

2169 self, key: str, default: Optional[_AttributeValue] = None

2170 ) -> Optional[_AttributeValue]:

2171 """Returns the value of the 'key' attribute for the tag, or

2172 the value given for 'default' if it doesn't have that

2173 attribute.

2174

2175 :param key: The attribute to look for.

2176 :param default: Use this value if the attribute is not present

2177 on this `Tag`.

2178 """

2179 return self.attrs.get(key, default)

2180

2181 def get_attribute_list(

2182 self, key: str, default: Optional[AttributeValueList] = None

2183 ) -> AttributeValueList:

2184 """The same as get(), but always returns a (possibly empty) list.

2185

2186 :param key: The attribute to look for.

2187 :param default: Use this value if the attribute is not present

2188 on this `Tag`.

2189 :return: A list of strings, usually empty or containing only a single

2190 value.

2191 """

2192 list_value: AttributeValueList

2193 value = self.get(key, default)

2194 if value is None:

2195 list_value = self.attribute_value_list_class()

2196 elif isinstance(value, list):

2197 list_value = value

2198 else:

2199 if not isinstance(value, str):

2200 value = cast(str, value)

2201 list_value = self.attribute_value_list_class([value])

2202 return list_value

2203

2204 def has_attr(self, key: str) -> bool:

2205 """Does this `Tag` have an attribute with the given name?"""

2206 return key in self.attrs

2207

2208 def __hash__(self) -> int:

2209 return str(self).__hash__()

2210

2211 def __getitem__(self, key: str) -> _AttributeValue:

2212 """tag[key] returns the value of the 'key' attribute for the Tag,

2213 and throws an exception if it's not there."""

2214 return self.attrs[key]

2215

2216 def __iter__(self) -> Iterator[PageElement]:

2217 "Iterating over a Tag iterates over its contents."

2218 return iter(self.contents)

2219

2220 def __len__(self) -> int:

2221 "The length of a Tag is the length of its list of contents."

2222 return len(self.contents)

2223

2224 def __contains__(self, x: Any) -> bool:

2225 return x in self.contents

2226

2227 def __bool__(self) -> bool:

2228 "A tag is non-None even if it has no contents."

2229 return True

2230

2231 def __setitem__(self, key: str, value: _AttributeValue) -> None:

2232 """Setting tag[key] sets the value of the 'key' attribute for the

2233 tag."""

2234 self.attrs[key] = value

2235

2236 def __delitem__(self, key: str) -> None:

2237 "Deleting tag[key] deletes all 'key' attributes for the tag."

2238 self.attrs.pop(key, None)

2239

2240 def __call__(

2241 self,

2242 name: Optional[_StrainableElement] = None,

2243 attrs: _StrainableAttributes = {},

2244 recursive: bool = True,

2245 string: Optional[_StrainableString] = None,

2246 limit: Optional[int] = None,

2247 _stacklevel: int = 2,

2248 **kwargs: _StrainableAttribute,

2249 ) -> _QueryResults:

2250 """Calling a Tag like a function is the same as calling its

2251 find_all() method. Eg. tag('a') returns a list of all the A tags

2252 found within this tag."""

2253 return self.find_all(

2254 name, attrs, recursive, string, limit, _stacklevel, **kwargs

2255 )

2256

2257 def __getattr__(self, subtag: str) -> Optional[Tag]:

2258 """Calling tag.subtag is the same as calling tag.find(name="subtag")"""

2259 # print("Getattr %s.%s" % (self.__class__, tag))

2260 result: _AtMostOneElement

2261 if len(subtag) > 3 and subtag.endswith("Tag"):

2262 # BS3: soup.aTag -> "soup.find("a")

2263 tag_name = subtag[:-3]

2264 warnings.warn(

2265 '.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")'

2266 % dict(name=tag_name),

2267 DeprecationWarning,

2268 stacklevel=2,

2269 )

2270 result = self.find(tag_name)

2271 # We special case contents to avoid recursion.

2272 elif not subtag.startswith("__") and not subtag == "contents":

2273 result = self.find(subtag)

2274 else:

2275 raise AttributeError(

2276 "'%s' object has no attribute '%s'" % (self.__class__, subtag)

2277 )

2278 return cast(Optional[Tag], result)

2279

2280 def __eq__(self, other: Any) -> bool:

2281 """Returns true iff this Tag has the same name, the same attributes,

2282 and the same contents (recursively) as `other`."""

2283 if self is other:

2284 return True

2285 if not isinstance(other, Tag):

2286 return False

2287 if (

2288 not hasattr(other, "name")

2289 or not hasattr(other, "attrs")

2290 or not hasattr(other, "contents")

2291 or self.name != other.name

2292 or self.attrs != other.attrs

2293 or len(self) != len(other)

2294 ):

2295 return False

2296 for i, my_child in enumerate(self.contents):

2297 if my_child != other.contents[i]:

2298 return False

2299 return True

2300

2301 def __ne__(self, other: Any) -> bool:

2302 """Returns true iff this Tag is not identical to `other`,

2303 as defined in __eq__."""

2304 return not self == other

2305

2306 def __repr__(self) -> str:

2307 """Renders this `Tag` as a string."""

2308 return self.decode()

2309

2310 __str__ = __unicode__ = __repr__

2311

2312 def encode(

2313 self,

2314 encoding: _Encoding = DEFAULT_OUTPUT_ENCODING,

2315 indent_level: Optional[int] = None,

2316 formatter: _FormatterOrName = "minimal",

2317 errors: str = "xmlcharrefreplace",

2318 ) -> bytes:

2319 """Render this `Tag` and its contents as a bytestring.

2320

2321 :param encoding: The encoding to use when converting to

2322 a bytestring. This may also affect the text of the document,

2323 specifically any encoding declarations within the document.

2324 :param indent_level: Each line of the rendering will be

2325 indented this many levels. (The ``formatter`` decides what a

2326 'level' means, in terms of spaces or other characters

2327 output.) This is used internally in recursive calls while

2328 pretty-printing.

2329 :param formatter: Either a `Formatter` object, or a string naming one of

2330 the standard formatters.

2331 :param errors: An error handling strategy such as

2332 'xmlcharrefreplace'. This value is passed along into

2333 :py:meth:`str.encode` and its value should be one of the `error

2334 handling constants defined by Python's codecs module

2335 <https://docs.python.org/3/library/codecs.html#error-handlers>`_.

2336 """

2337 # Turn the data structure into Unicode, then encode the

2338 # Unicode.

2339 u = self.decode(indent_level, encoding, formatter)

2340 return u.encode(encoding, errors)

2341

2342 def decode(

2343 self,

2344 indent_level: Optional[int] = None,

2345 eventual_encoding: _Encoding = DEFAULT_OUTPUT_ENCODING,

2346 formatter: _FormatterOrName = "minimal",

2347 iterator: Optional[Iterator[PageElement]] = None,

2348 ) -> str:

2349 """Render this `Tag` and its contents as a Unicode string.

2350

2351 :param indent_level: Each line of the rendering will be

2352 indented this many levels. (The ``formatter`` decides what a

2353 'level' means, in terms of spaces or other characters

2354 output.) This is used internally in recursive calls while

2355 pretty-printing.

2356 :param encoding: The encoding you intend to use when

2357 converting the string to a bytestring. decode() is *not*

2358 responsible for performing that encoding. This information

2359 is needed so that a real encoding can be substituted in if

2360 the document contains an encoding declaration (e.g. in a

2361 <meta> tag).

2362 :param formatter: Either a `Formatter` object, or a string

2363 naming one of the standard formatters.

2364 :param iterator: The iterator to use when navigating over the

2365 parse tree. This is only used by `Tag.decode_contents` and

2366 you probably won't need to use it.

2367 """

2368 pieces = []

2369 # First off, turn a non-Formatter `formatter` into a Formatter

2370 # object. This will stop the lookup from happening over and

2371 # over again.

2372 if not isinstance(formatter, Formatter):

2373 formatter = self.formatter_for_name(formatter)

2374

2375 if indent_level is True:

2376 indent_level = 0

2377

2378 # The currently active tag that put us into string literal

2379 # mode. Until this element is closed, children will be treated

2380 # as string literals and not pretty-printed. String literal

2381 # mode is turned on immediately after this tag begins, and

2382 # turned off immediately before it's closed. This means there

2383 # will be whitespace before and after the tag itself.

2384 string_literal_tag = None

2385

2386 for event, element in self._event_stream(iterator):

2387 if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):

2388 element = cast(Tag, element)

2389 piece = element._format_tag(eventual_encoding, formatter, opening=True)

2390 elif event is Tag.END_ELEMENT_EVENT:

2391 element = cast(Tag, element)

2392 piece = element._format_tag(eventual_encoding, formatter, opening=False)

2393 if indent_level is not None:

2394 indent_level -= 1

2395 else:

2396 element = cast(NavigableString, element)

2397 piece = element.output_ready(formatter)

2398

2399 # Now we need to apply the 'prettiness' -- extra

2400 # whitespace before and/or after this tag. This can get

2401 # complicated because certain tags, like <pre> and

2402 # <script>, can't be prettified, since adding whitespace would

2403 # change the meaning of the content.

2404

2405 # The default behavior is to add whitespace before and

2406 # after an element when string literal mode is off, and to

2407 # leave things as they are when string literal mode is on.

2408 if string_literal_tag:

2409 indent_before = indent_after = False

2410 else:

2411 indent_before = indent_after = True

2412

2413 # The only time the behavior is more complex than that is

2414 # when we encounter an opening or closing tag that might

2415 # put us into or out of string literal mode.

2416 if (

2417 event is Tag.START_ELEMENT_EVENT

2418 and not string_literal_tag

2419 and not cast(Tag, element)._should_pretty_print()

2420 ):

2421 # We are about to enter string literal mode. Add

2422 # whitespace before this tag, but not after. We

2423 # will stay in string literal mode until this tag

2424 # is closed.

2425 indent_before = True

2426 indent_after = False

2427 string_literal_tag = element

2428 elif event is Tag.END_ELEMENT_EVENT and element is string_literal_tag:

2429 # We are about to exit string literal mode by closing

2430 # the tag that sent us into that mode. Add whitespace

2431 # after this tag, but not before.

2432 indent_before = False

2433 indent_after = True

2434 string_literal_tag = None

2435

2436 # Now we know whether to add whitespace before and/or

2437 # after this element.

2438 if indent_level is not None:

2439 if indent_before or indent_after:

2440 if isinstance(element, NavigableString):

2441 piece = piece.strip()

2442 if piece:

2443 piece = self._indent_string(

2444 piece, indent_level, formatter, indent_before, indent_after

2445 )

2446 if event == Tag.START_ELEMENT_EVENT:

2447 indent_level += 1

2448 pieces.append(piece)

2449 return "".join(pieces)

2450

2451 class _TreeTraversalEvent(object):

2452 """An internal class representing an event in the process

2453 of traversing a parse tree.

2454

2455 :meta private:

2456 """

2457

2458 # Stand-ins for the different events yielded by _event_stream

2459 START_ELEMENT_EVENT = _TreeTraversalEvent() #: :meta private:

2460 END_ELEMENT_EVENT = _TreeTraversalEvent() #: :meta private:

2461 EMPTY_ELEMENT_EVENT = _TreeTraversalEvent() #: :meta private:

2462 STRING_ELEMENT_EVENT = _TreeTraversalEvent() #: :meta private:

2463

2464 def _event_stream(

2465 self, iterator: Optional[Iterator[PageElement]] = None

2466 ) -> Iterator[Tuple[_TreeTraversalEvent, PageElement]]:

2467 """Yield a sequence of events that can be used to reconstruct the DOM

2468 for this element.

2469

2470 This lets us recreate the nested structure of this element

2471 (e.g. when formatting it as a string) without using recursive

2472 method calls.

2473

2474 This is similar in concept to the SAX API, but it's a simpler

2475 interface designed for internal use. The events are different

2476 from SAX and the arguments associated with the events are Tags

2477 and other Beautiful Soup objects.

2478

2479 :param iterator: An alternate iterator to use when traversing

2480 the tree.

2481 """

2482 tag_stack: List[Tag] = []

2483

2484 iterator = iterator or self.self_and_descendants

2485

2486 for c in iterator:

2487 # If the parent of the element we're about to yield is not

2488 # the tag currently on the stack, it means that the tag on

2489 # the stack closed before this element appeared.

2490 while tag_stack and c.parent != tag_stack[-1]:

2491 now_closed_tag = tag_stack.pop()

2492 yield Tag.END_ELEMENT_EVENT, now_closed_tag

2493

2494 if isinstance(c, Tag):

2495 if c.is_empty_element:

2496 yield Tag.EMPTY_ELEMENT_EVENT, c

2497 else:

2498 yield Tag.START_ELEMENT_EVENT, c

2499 tag_stack.append(c)

2500 continue

2501 else:

2502 yield Tag.STRING_ELEMENT_EVENT, c

2503

2504 while tag_stack:

2505 now_closed_tag = tag_stack.pop()

2506 yield Tag.END_ELEMENT_EVENT, now_closed_tag

2507

2508 def _indent_string(

2509 self,

2510 s: str,

2511 indent_level: int,

2512 formatter: Formatter,

2513 indent_before: bool,

2514 indent_after: bool,

2515 ) -> str:

2516 """Add indentation whitespace before and/or after a string.

2517

2518 :param s: The string to amend with whitespace.

2519 :param indent_level: The indentation level; affects how much

2520 whitespace goes before the string.

2521 :param indent_before: Whether or not to add whitespace

2522 before the string.

2523 :param indent_after: Whether or not to add whitespace

2524 (a newline) after the string.

2525 """

2526 space_before = ""

2527 if indent_before and indent_level:

2528 space_before = formatter.indent * indent_level

2529

2530 space_after = ""

2531 if indent_after:

2532 space_after = "\n"

2533

2534 return space_before + s + space_after

2535

2536 def _format_tag(

2537 self, eventual_encoding: str, formatter: Formatter, opening: bool

2538 ) -> str:

2539 if self.hidden:

2540 # A hidden tag is invisible, although its contents

2541 # are visible.

2542 return ""

2543

2544 # A tag starts with the < character (see below).

2545

2546 # Then the / character, if this is a closing tag.

2547 closing_slash = ""

2548 if not opening:

2549 closing_slash = "/"

2550

2551 # Then an optional namespace prefix.

2552 prefix = ""

2553 if self.prefix:

2554 prefix = self.prefix + ":"

2555

2556 # Then a list of attribute values, if this is an opening tag.

2557 attribute_string = ""

2558 if opening:

2559 attributes = formatter.attributes(self)

2560 attrs = []

2561 for key, val in attributes:

2562 if val is None:

2563 decoded = key

2564 else:

2565 if isinstance(val, list) or isinstance(val, tuple):

2566 val = " ".join(val)

2567 elif not isinstance(val, str):

2568 val = str(val)

2569 elif (

2570 isinstance(val, AttributeValueWithCharsetSubstitution)

2571 and eventual_encoding is not None

2572 ):

2573 val = val.substitute_encoding(eventual_encoding)

2574

2575 text = formatter.attribute_value(val)

2576 decoded = str(key) + "=" + formatter.quoted_attribute_value(text)

2577 attrs.append(decoded)

2578 if attrs:

2579 attribute_string = " " + " ".join(attrs)

2580

2581 # Then an optional closing slash (for a void element in an

2582 # XML document).

2583 void_element_closing_slash = ""

2584 if self.is_empty_element:

2585 void_element_closing_slash = formatter.void_element_close_prefix or ""

2586

2587 # Put it all together.

2588 return (

2589 "<"

2590 + closing_slash

2591 + prefix

2592 + self.name

2593 + attribute_string

2594 + void_element_closing_slash

2595 + ">"

2596 )

2597

2598 def _should_pretty_print(self, indent_level: int = 1) -> bool:

2599 """Should this tag be pretty-printed?

2600

2601 Most of them should, but some (such as <pre> in HTML

2602 documents) should not.

2603 """

2604 return indent_level is not None and (

2605 not self.preserve_whitespace_tags

2606 or self.name not in self.preserve_whitespace_tags

2607 )

2608

2609 def prettify(

2610 self,

2611 encoding: Optional[_Encoding] = None,

2612 formatter: _FormatterOrName = "minimal",

2613 ) -> Union[str, bytes]:

2614 """Pretty-print this `Tag` as a string or bytestring.

2615

2616 :param encoding: The encoding of the bytestring, or None if you want Unicode.

2617 :param formatter: A Formatter object, or a string naming one of

2618 the standard formatters.

2619 :return: A string (if no ``encoding`` is provided) or a bytestring

2620 (otherwise).

2621 """

2622 if encoding is None:

2623 return self.decode(indent_level=0, formatter=formatter)

2624 else:

2625 return self.encode(encoding=encoding, indent_level=0, formatter=formatter)

2626

2627 def decode_contents(

2628 self,

2629 indent_level: Optional[int] = None,

2630 eventual_encoding: _Encoding = DEFAULT_OUTPUT_ENCODING,

2631 formatter: _FormatterOrName = "minimal",

2632 ) -> str:

2633 """Renders the contents of this tag as a Unicode string.

2634

2635 :param indent_level: Each line of the rendering will be

2636 indented this many levels. (The formatter decides what a

2637 'level' means in terms of spaces or other characters

2638 output.) Used internally in recursive calls while

2639 pretty-printing.

2640

2641 :param eventual_encoding: The tag is destined to be

2642 encoded into this encoding. decode_contents() is *not*

2643 responsible for performing that encoding. This information

2644 is needed so that a real encoding can be substituted in if

2645 the document contains an encoding declaration (e.g. in a

2646 <meta> tag).

2647

2648 :param formatter: A `Formatter` object, or a string naming one of

2649 the standard Formatters.

2650 """

2651 return self.decode(

2652 indent_level, eventual_encoding, formatter, iterator=self.descendants

2653 )

2654

2655 def encode_contents(

2656 self,

2657 indent_level: Optional[int] = None,

2658 encoding: _Encoding = DEFAULT_OUTPUT_ENCODING,

2659 formatter: _FormatterOrName = "minimal",

2660 ) -> bytes:

2661 """Renders the contents of this PageElement as a bytestring.

2662

2663 :param indent_level: Each line of the rendering will be

2664 indented this many levels. (The ``formatter`` decides what a

2665 'level' means, in terms of spaces or other characters

2666 output.) This is used internally in recursive calls while

2667 pretty-printing.

2668 :param formatter: Either a `Formatter` object, or a string naming one of

2669 the standard formatters.

2670 :param encoding: The bytestring will be in this encoding.

2671 """

2672 contents = self.decode_contents(indent_level, encoding, formatter)

2673 return contents.encode(encoding)

2674

2675 @_deprecated("encode_contents", "4.0.0")

2676 def renderContents(

2677 self,

2678 encoding: _Encoding = DEFAULT_OUTPUT_ENCODING,

2679 prettyPrint: bool = False,

2680 indentLevel: Optional[int] = 0,

2681 ) -> bytes:

2682 """Deprecated method for BS3 compatibility.

2683

2684 :meta private:

2685 """

2686 if not prettyPrint:

2687 indentLevel = None

2688 return self.encode_contents(indent_level=indentLevel, encoding=encoding)

2689

2690 # Soup methods

2691

2692 def find(

2693 self,

2694 name: _FindMethodName = None,

2695 attrs: _StrainableAttributes = {},

2696 recursive: bool = True,

2697 string: Optional[_StrainableString] = None,

2698 **kwargs: _StrainableAttribute,

2699 ) -> _AtMostOneElement:

2700 """Look in the children of this PageElement and find the first

2701 PageElement that matches the given criteria.

2702

2703 All find_* methods take a common set of arguments. See the online

2704 documentation for detailed explanations.

2705

2706 :param name: A filter on tag name.

2707 :param attrs: Additional filters on attribute values.

2708 :param recursive: If this is True, find() will perform a

2709 recursive search of this Tag's children. Otherwise,

2710 only the direct children will be considered.

2711 :param string: A filter on the `Tag.string` attribute.

2712 :param limit: Stop looking after finding this many results.

2713 :kwargs: Additional filters on attribute values.

2714 """

2715 r = None

2716 results = self.find_all(name, attrs, recursive, string, 1, _stacklevel=3, **kwargs)

2717 if results:

2718 r = results[0]

2719 return r

2720

2721 findChild = _deprecated_function_alias("findChild", "find", "3.0.0")

2722

2723 def find_all(

2724 self,

2725 name: _FindMethodName = None,

2726 attrs: _StrainableAttributes = {},

2727 recursive: bool = True,

2728 string: Optional[_StrainableString] = None,

2729 limit: Optional[int] = None,

2730 _stacklevel: int = 2,

2731 **kwargs: _StrainableAttribute,

2732 ) -> _QueryResults:

2733 """Look in the children of this `PageElement` and find all

2734 `PageElement` objects that match the given criteria.

2735

2736 All find_* methods take a common set of arguments. See the online

2737 documentation for detailed explanations.

2738

2739 :param name: A filter on tag name.

2740 :param attrs: Additional filters on attribute values.

2741 :param recursive: If this is True, find_all() will perform a

2742 recursive search of this PageElement's children. Otherwise,

2743 only the direct children will be considered.

2744 :param limit: Stop looking after finding this many results.

2745 :param _stacklevel: Used internally to improve warning messages.

2746 :kwargs: Additional filters on attribute values.

2747 """

2748 generator = self.descendants

2749 if not recursive:

2750 generator = self.children

2751 return self._find_all(

2752 name, attrs, string, limit, generator, _stacklevel=_stacklevel + 1, **kwargs

2753 )

2754

2755 findAll = _deprecated_function_alias("findAll", "find_all", "4.0.0")

2756 findChildren = _deprecated_function_alias("findChildren", "find_all", "3.0.0")

2757

2758 # Generator methods

2759 @property

2760 def children(self) -> Iterator[PageElement]:

2761 """Iterate over all direct children of this `PageElement`."""

2762 return (x for x in self.contents)

2763

2764 @property

2765 def self_and_descendants(self) -> Iterator[PageElement]:

2766 """Iterate over this `Tag` and its children in a

2767 breadth-first sequence.

2768 """

2769 return self._self_and(self.descendants)

2770

2771 @property

2772 def descendants(self) -> Iterator[PageElement]:

2773 """Iterate over all children of this `Tag` in a

2774 breadth-first sequence.

2775 """

2776 if not len(self.contents):

2777 return

2778 # _last_descendant() can't return None here because

2779 # accept_self is True. Worst case, last_descendant will end up

2780 # as self.

2781 last_descendant = cast(PageElement, self._last_descendant(accept_self=True))

2782 stopNode = last_descendant.next_element

2783 current: _AtMostOneElement = self.contents[0]

2784 while current is not stopNode and current is not None:

2785 successor = current.next_element

2786 yield current

2787 current = successor

2788

2789 # CSS selector code

2790 def select_one(

2791 self, selector: str, namespaces: Optional[Dict[str, str]] = None, **kwargs: Any

2792 ) -> Optional[Tag]:

2793 """Perform a CSS selection operation on the current element.

2794

2795 :param selector: A CSS selector.

2796

2797 :param namespaces: A dictionary mapping namespace prefixes

2798 used in the CSS selector to namespace URIs. By default,

2799 Beautiful Soup will use the prefixes it encountered while

2800 parsing the document.

2801

2802 :param kwargs: Keyword arguments to be passed into Soup Sieve's

2803 soupsieve.select() method.

2804 """

2805 return self.css.select_one(selector, namespaces, **kwargs)

2806

2807 def select(

2808 self,

2809 selector: str,

2810 namespaces: Optional[Dict[str, str]] = None,

2811 limit: int = 0,

2812 **kwargs: Any,

2813 ) -> ResultSet[Tag]:

2814 """Perform a CSS selection operation on the current element.

2815

2816 This uses the SoupSieve library.

2817

2818 :param selector: A string containing a CSS selector.

2819

2820 :param namespaces: A dictionary mapping namespace prefixes

2821 used in the CSS selector to namespace URIs. By default,

2822 Beautiful Soup will use the prefixes it encountered while

2823 parsing the document.

2824

2825 :param limit: After finding this number of results, stop looking.

2826

2827 :param kwargs: Keyword arguments to be passed into SoupSieve's

2828 soupsieve.select() method.

2829 """

2830 return self.css.select(selector, namespaces, limit, **kwargs)

2831

2832 @property

2833 def css(self) -> CSS:

2834 """Return an interface to the CSS selector API."""

2835 return CSS(self)

2836

2837 # Old names for backwards compatibility

2838 @_deprecated("children", "4.0.0")

2839 def childGenerator(self) -> Iterator[PageElement]:

2840 """Deprecated generator.

2841

2842 :meta private:

2843 """

2844 return self.children

2845

2846 @_deprecated("descendants", "4.0.0")

2847 def recursiveChildGenerator(self) -> Iterator[PageElement]:

2848 """Deprecated generator.

2849

2850 :meta private:

2851 """

2852 return self.descendants

2853

2854 @_deprecated("has_attr", "4.0.0")

2855 def has_key(self, key: str) -> bool:

2856 """Deprecated method. This was kind of misleading because has_key()

2857 (attributes) was different from __in__ (contents).

2858

2859 has_key() is gone in Python 3, anyway.

2860

2861 :meta private:

2862 """

2863 return self.has_attr(key)

2864

2865

2866_PageElementT = TypeVar("_PageElementT", bound=PageElement)

2867

2868

2869class ResultSet(List[_PageElementT], Generic[_PageElementT]):

2870 """A ResultSet is a list of `PageElement` objects, gathered as the result

2871 of matching an :py:class:`ElementFilter` against a parse tree. Basically, a list of

2872 search results.

2873 """

2874

2875 source: Optional[ElementFilter]

2876

2877 def __init__(

2878 self, source: Optional[ElementFilter], result: Iterable[_PageElementT] = ()

2879 ) -> None:

2880 super(ResultSet, self).__init__(result)

2881 self.source = source

2882

2883 def __getattr__(self, key: str) -> None:

2884 """Raise a helpful exception to explain a common code fix."""

2885 raise AttributeError(

2886 f"""ResultSet object has no attribute "{key}". You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?"""

2887 )

2888

2889

2890# Now that all the classes used by SoupStrainer have been defined,

2891# import SoupStrainer itself into this module to preserve the

2892# backwards compatibility of anyone who imports

2893# bs4.element.SoupStrainer.

2894from bs4.filter import SoupStrainer # noqa: E402