Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/bs4/element.py: 41%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1104 statements  

1from __future__ import annotations 

2 

3# Use of this source code is governed by the MIT license. 

4__license__ = "MIT" 

5 

6import inspect 

7import re 

8import warnings 

9 

10from bs4.css import CSS 

11from bs4._deprecation import ( 

12 _deprecated, 

13 _deprecated_alias, 

14 _deprecated_function_alias, 

15) 

16from bs4.formatter import ( 

17 Formatter, 

18 HTMLFormatter, 

19 XMLFormatter, 

20) 

21from bs4._warnings import AttributeResemblesVariableWarning 

22 

23from typing import ( 

24 Any, 

25 Callable, 

26 Dict, 

27 Generic, 

28 Iterable, 

29 Iterator, 

30 List, 

31 Mapping, 

32 MutableSequence, 

33 Optional, 

34 Pattern, 

35 Set, 

36 TYPE_CHECKING, 

37 Tuple, 

38 Type, 

39 TypeVar, 

40 Union, 

41 cast, 

42 overload, 

43) 

44from typing_extensions import ( 

45 Self, 

46 TypeAlias, 

47) 

48 

49if TYPE_CHECKING: 

50 from bs4 import BeautifulSoup 

51 from bs4.builder import TreeBuilder 

52 from bs4.filter import ElementFilter 

53 from bs4.formatter import ( 

54 _EntitySubstitutionFunction, 

55 _FormatterOrName, 

56 ) 

57 from bs4._typing import ( 

58 _AtMostOneElement, 

59 _AtMostOneNavigableString, 

60 _AtMostOneTag, 

61 _AttributeValue, 

62 _AttributeValues, 

63 _Encoding, 

64 _InsertableElement, 

65 _OneElement, 

66 _QueryResults, 

67 _RawAttributeValue, 

68 _RawAttributeValues, 

69 _RawOrProcessedAttributeValues, 

70 _SomeNavigableStrings, 

71 _SomeTags, 

72 _StrainableAttribute, 

73 _StrainableAttributes, 

74 _StrainableElement, 

75 _StrainableString, 

76 ) 

77 

78_OneOrMoreStringTypes: TypeAlias = Union[ 

79 Type["NavigableString"], Iterable[Type["NavigableString"]] 

80] 

81 

82_FindMethodName: TypeAlias = Union["_StrainableElement", "ElementFilter"] 

83_OptionalFindMethodName: TypeAlias = Optional[_FindMethodName] 

84 

85# Deprecated module-level attributes. 

86# See https://peps.python.org/pep-0562/ 

87_deprecated_names = dict( 

88 whitespace_re="The {name} attribute was deprecated in version 4.7.0. If you need it, make your own copy." 

89) 

90#: :meta private: 

91_deprecated_whitespace_re: Pattern[str] = re.compile(r"\s+") 

92 

93 

94def __getattr__(name: str) -> Any: 

95 if name in _deprecated_names: 

96 message = _deprecated_names[name] 

97 warnings.warn(message.format(name=name), DeprecationWarning, stacklevel=2) 

98 

99 return globals()[f"_deprecated_{name}"] 

100 raise AttributeError(f"module {__name__!r} has no attribute {name!r}") 

101 

102 

103#: Documents output by Beautiful Soup will be encoded with 

104#: this encoding unless you specify otherwise. 

105DEFAULT_OUTPUT_ENCODING: str = "utf-8" 

106 

107#: A regular expression that can be used to split on whitespace. 

108nonwhitespace_re: Pattern[str] = re.compile(r"\S+") 

109 

110#: These encodings are recognized by Python (so `Tag.encode` 

111#: could theoretically support them) but XML and HTML don't recognize 

112#: them (so they should not show up in an XML or HTML document as that 

113#: document's encoding). 

114#: 

115#: If an XML document is encoded in one of these encodings, no encoding 

116#: will be mentioned in the XML declaration. If an HTML document is 

117#: encoded in one of these encodings, and the HTML document has a 

118#: <meta> tag that mentions an encoding, the encoding will be given as 

119#: the empty string. 

120#: 

121#: Source: 

122#: Python documentation, `Python Specific Encodings <https://docs.python.org/3/library/codecs.html#python-specific-encodings>`_ 

123PYTHON_SPECIFIC_ENCODINGS: Set[_Encoding] = set( 

124 [ 

125 "idna", 

126 "mbcs", 

127 "oem", 

128 "palmos", 

129 "punycode", 

130 "raw_unicode_escape", 

131 "undefined", 

132 "unicode_escape", 

133 "raw-unicode-escape", 

134 "unicode-escape", 

135 "string-escape", 

136 "string_escape", 

137 ] 

138) 

139 

140 

141class NamespacedAttribute(str): 

142 """A namespaced attribute (e.g. the 'xml:lang' in 'xml:lang="en"') 

143 which remembers the namespace prefix ('xml') and the name ('lang') 

144 that were used to create it. 

145 """ 

146 

147 prefix: Optional[str] 

148 name: Optional[str] 

149 namespace: Optional[str] 

150 

151 def __new__( 

152 cls, 

153 prefix: Optional[str], 

154 name: Optional[str] = None, 

155 namespace: Optional[str] = None, 

156 ) -> Self: 

157 if not name: 

158 # This is the default namespace. Its name "has no value" 

159 # per https://www.w3.org/TR/xml-names/#defaulting 

160 name = None 

161 

162 if not name: 

163 obj = str.__new__(cls, prefix) 

164 elif not prefix: 

165 # Not really namespaced. 

166 obj = str.__new__(cls, name) 

167 else: 

168 obj = str.__new__(cls, prefix + ":" + name) 

169 obj.prefix = prefix 

170 obj.name = name 

171 obj.namespace = namespace 

172 return obj 

173 

174 

175class AttributeValueWithCharsetSubstitution(str): 

176 """An abstract class standing in for a character encoding specified 

177 inside an HTML ``<meta>`` tag. 

178 

179 Subclasses exist for each place such a character encoding might be 

180 found: either inside the ``charset`` attribute 

181 (`CharsetMetaAttributeValue`) or inside the ``content`` attribute 

182 (`ContentMetaAttributeValue`) 

183 

184 This allows Beautiful Soup to replace that part of the HTML file 

185 with a different encoding when ouputting a tree as a string. 

186 """ 

187 

188 # The original, un-encoded value of the ``content`` attribute. 

189 #: :meta private: 

190 original_value: str 

191 

192 def substitute_encoding(self, eventual_encoding: str) -> str: 

193 """Do whatever's necessary in this implementation-specific 

194 portion an HTML document to substitute in a specific encoding. 

195 """ 

196 raise NotImplementedError() 

197 

198 

199class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): 

200 """A generic stand-in for the value of a ``<meta>`` tag's ``charset`` 

201 attribute. 

202 

203 When Beautiful Soup parses the markup ``<meta charset="utf8">``, the 

204 value of the ``charset`` attribute will become one of these objects. 

205 

206 If the document is later encoded to an encoding other than UTF-8, its 

207 ``<meta>`` tag will mention the new encoding instead of ``utf8``. 

208 """ 

209 

210 def __new__(cls, original_value: str) -> Self: 

211 # We don't need to use the original value for anything, but 

212 # it might be useful for the user to know. 

213 obj = str.__new__(cls, original_value) 

214 obj.original_value = original_value 

215 return obj 

216 

217 def substitute_encoding(self, eventual_encoding: _Encoding = "utf-8") -> str: 

218 """When an HTML document is being encoded to a given encoding, the 

219 value of a ``<meta>`` tag's ``charset`` becomes the name of 

220 the encoding. 

221 """ 

222 if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS: 

223 return "" 

224 return eventual_encoding 

225 

226 

227class AttributeValueList(List[str]): 

228 """Class for the list used to hold the values of attributes which 

229 have multiple values (such as HTML's 'class'). It's just a regular 

230 list, but you can subclass it and pass it in to the TreeBuilder 

231 constructor as attribute_value_list_class, to have your subclass 

232 instantiated instead. 

233 """ 

234 

235 

236class AttributeDict(Dict[Any,Any]): 

237 """Superclass for the dictionary used to hold a tag's 

238 attributes. You can use this, but it's just a regular dict with no 

239 special logic. 

240 """ 

241 

242 

243class XMLAttributeDict(AttributeDict): 

244 """A dictionary for holding a Tag's attributes, which processes 

245 incoming values for consistency with the HTML spec. 

246 """ 

247 

248 def __setitem__(self, key: str, value: Any) -> None: 

249 """Set an attribute value, possibly modifying it to comply with 

250 the XML spec. 

251 

252 This just means converting common non-string values to 

253 strings: XML attributes may have "any literal string as a 

254 value." 

255 """ 

256 if value is None: 

257 value = "" 

258 if isinstance(value, bool): 

259 # XML does not define any rules for boolean attributes. 

260 # Preserve the old Beautiful Soup behavior (a bool that 

261 # gets converted to a string on output) rather than 

262 # guessing what the value should be. 

263 pass 

264 elif isinstance(value, (int, float)): 

265 # It's dangerous to convert _every_ attribute value into a 

266 # plain string, since an attribute value may be a more 

267 # sophisticated string-like object 

268 # (e.g. CharsetMetaAttributeValue). But we can definitely 

269 # convert numeric values and booleans, which are the most common. 

270 value = str(value) 

271 

272 super().__setitem__(key, value) 

273 

274 

275class HTMLAttributeDict(AttributeDict): 

276 """A dictionary for holding a Tag's attributes, which processes 

277 incoming values for consistency with the HTML spec, which says 

278 'Attribute values are a mixture of text and character 

279 references...' 

280 

281 Basically, this means converting common non-string values into 

282 strings, like XMLAttributeDict, though HTML also has some rules 

283 around boolean attributes that XML doesn't have. 

284 """ 

285 

286 def __setitem__(self, key: str, value: Any) -> None: 

287 """Set an attribute value, possibly modifying it to comply 

288 with the HTML spec, 

289 """ 

290 if value in (False, None): 

291 # 'The values "true" and "false" are not allowed on 

292 # boolean attributes. To represent a false value, the 

293 # attribute has to be omitted altogether.' 

294 if key in self: 

295 del self[key] 

296 return 

297 if isinstance(value, bool): 

298 # 'If the [boolean] attribute is present, its value must 

299 # either be the empty string or a value that is an ASCII 

300 # case-insensitive match for the attribute's canonical 

301 # name, with no leading or trailing whitespace.' 

302 # 

303 # [fixme] It's not clear to me whether "canonical name" 

304 # means fully-qualified name, unqualified name, or 

305 # (probably not) name with namespace prefix. For now I'm 

306 # going with unqualified name. 

307 if isinstance(key, NamespacedAttribute): 

308 value = key.name 

309 else: 

310 value = key 

311 elif isinstance(value, (int, float)): 

312 # See note in XMLAttributeDict for the reasoning why we 

313 # only do this to numbers. 

314 value = str(value) 

315 super().__setitem__(key, value) 

316 

317 

318class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): 

319 """A generic stand-in for the value of a ``<meta>`` tag's ``content`` 

320 attribute. 

321 

322 When Beautiful Soup parses the markup: 

323 ``<meta http-equiv="content-type" content="text/html; charset=utf8">`` 

324 

325 The value of the ``content`` attribute will become one of these objects. 

326 

327 If the document is later encoded to an encoding other than UTF-8, its 

328 ``<meta>`` tag will mention the new encoding instead of ``utf8``. 

329 """ 

330 

331 #: Match the 'charset' argument inside the 'content' attribute 

332 #: of a <meta> tag. 

333 #: :meta private: 

334 CHARSET_RE: Pattern[str] = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M) 

335 

336 def __new__(cls, original_value: str) -> Self: 

337 cls.CHARSET_RE.search(original_value) 

338 obj = str.__new__(cls, original_value) 

339 obj.original_value = original_value 

340 return obj 

341 

342 def substitute_encoding(self, eventual_encoding: _Encoding = "utf-8") -> str: 

343 """When an HTML document is being encoded to a given encoding, the 

344 value of the ``charset=`` in a ``<meta>`` tag's ``content`` becomes 

345 the name of the encoding. 

346 """ 

347 if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS: 

348 return self.CHARSET_RE.sub("", self.original_value) 

349 

350 def rewrite(match: re.Match[str]) -> str: 

351 return match.group(1) + eventual_encoding 

352 

353 return self.CHARSET_RE.sub(rewrite, self.original_value) 

354 

355 

356class PageElement(object): 

357 """An abstract class representing a single element in the parse tree. 

358 

359 `NavigableString`, `Tag`, etc. are all subclasses of 

360 `PageElement`. For this reason you'll see a lot of methods that 

361 return `PageElement`, but you'll never see an actual `PageElement` 

362 object. For the most part you can think of `PageElement` as 

363 meaning "a `Tag` or a `NavigableString`." 

364 """ 

365 

366 #: In general, we can't tell just by looking at an element whether 

367 #: it's contained in an XML document or an HTML document. But for 

368 #: `Tag` objects (q.v.) we can store this information at parse time. 

369 #: :meta private: 

370 known_xml: Optional[bool] = None 

371 

372 #: Whether or not this element has been decomposed from the tree 

373 #: it was created in. 

374 _decomposed: bool 

375 

376 parent: Optional[Tag] 

377 next_element: _AtMostOneElement 

378 previous_element: _AtMostOneElement 

379 next_sibling: _AtMostOneElement 

380 previous_sibling: _AtMostOneElement 

381 

382 #: Whether or not this element is hidden from generated output. 

383 #: Only the `BeautifulSoup` object itself is hidden. 

384 hidden: bool = False 

385 

386 def setup( 

387 self, 

388 parent: Optional[Tag] = None, 

389 previous_element: _AtMostOneElement = None, 

390 next_element: _AtMostOneElement = None, 

391 previous_sibling: _AtMostOneElement = None, 

392 next_sibling: _AtMostOneElement = None, 

393 ) -> None: 

394 """Sets up the initial relations between this element and 

395 other elements. 

396 

397 :param parent: The parent of this element. 

398 

399 :param previous_element: The element parsed immediately before 

400 this one. 

401 

402 :param next_element: The element parsed immediately after 

403 this one. 

404 

405 :param previous_sibling: The most recently encountered element 

406 on the same level of the parse tree as this one. 

407 

408 :param previous_sibling: The next element to be encountered 

409 on the same level of the parse tree as this one. 

410 """ 

411 self.parent = parent 

412 

413 self.previous_element = previous_element 

414 if self.previous_element is not None: 

415 self.previous_element.next_element = self 

416 

417 self.next_element = next_element 

418 if self.next_element is not None: 

419 self.next_element.previous_element = self 

420 

421 self.next_sibling = next_sibling 

422 if self.next_sibling is not None: 

423 self.next_sibling.previous_sibling = self 

424 

425 if ( 

426 previous_sibling is None 

427 and self.parent is not None 

428 and self.parent.contents 

429 ): 

430 previous_sibling = self.parent.contents[-1] 

431 

432 self.previous_sibling = previous_sibling 

433 if self.previous_sibling is not None: 

434 self.previous_sibling.next_sibling = self 

435 

436 def format_string(self, s: str, formatter: Optional[_FormatterOrName]) -> str: 

437 """Format the given string using the given formatter. 

438 

439 :param s: A string. 

440 :param formatter: A Formatter object, or a string naming one of the standard formatters. 

441 """ 

442 if formatter is None: 

443 return s 

444 if not isinstance(formatter, Formatter): 

445 formatter = self.formatter_for_name(formatter) 

446 output = formatter.substitute(s) 

447 return output 

448 

449 def formatter_for_name( 

450 self, formatter_name: Union[_FormatterOrName, _EntitySubstitutionFunction] 

451 ) -> Formatter: 

452 """Look up or create a Formatter for the given identifier, 

453 if necessary. 

454 

455 :param formatter: Can be a `Formatter` object (used as-is), a 

456 function (used as the entity substitution hook for an 

457 `bs4.formatter.XMLFormatter` or 

458 `bs4.formatter.HTMLFormatter`), or a string (used to look 

459 up an `bs4.formatter.XMLFormatter` or 

460 `bs4.formatter.HTMLFormatter` in the appropriate registry. 

461 

462 """ 

463 if isinstance(formatter_name, Formatter): 

464 return formatter_name 

465 c: type[Formatter] 

466 registry: Mapping[Optional[str], Formatter] 

467 if self._is_xml: 

468 c = XMLFormatter 

469 registry = XMLFormatter.REGISTRY 

470 else: 

471 c = HTMLFormatter 

472 registry = HTMLFormatter.REGISTRY 

473 if callable(formatter_name): 

474 return c(entity_substitution=formatter_name) 

475 return registry[formatter_name] 

476 

477 @property 

478 def _is_xml(self) -> bool: 

479 """Is this element part of an XML tree or an HTML tree? 

480 

481 This is used in formatter_for_name, when deciding whether an 

482 XMLFormatter or HTMLFormatter is more appropriate. It can be 

483 inefficient, but it should be called very rarely. 

484 """ 

485 if self.known_xml is not None: 

486 # Most of the time we will have determined this when the 

487 # document is parsed. 

488 return self.known_xml 

489 

490 # Otherwise, it's likely that this element was created by 

491 # direct invocation of the constructor from within the user's 

492 # Python code. 

493 if self.parent is None: 

494 # This is the top-level object. It should have .known_xml set 

495 # from tree creation. If not, take a guess--BS is usually 

496 # used on HTML markup. 

497 return getattr(self, "is_xml", False) 

498 return self.parent._is_xml 

499 

500 nextSibling = _deprecated_alias("nextSibling", "next_sibling", "4.0.0") 

501 previousSibling = _deprecated_alias("previousSibling", "previous_sibling", "4.0.0") 

502 

503 def __deepcopy__(self, memo: Dict[Any, Any], recursive: bool = False) -> Self: 

504 raise NotImplementedError() 

505 

506 def __copy__(self) -> Self: 

507 """A copy of a PageElement can only be a deep copy, because 

508 only one PageElement can occupy a given place in a parse tree. 

509 """ 

510 return self.__deepcopy__({}) 

511 

512 default: Iterable[type[NavigableString]] = tuple() #: :meta private: 

513 

514 def _all_strings( 

515 self, strip: bool = False, types: Iterable[type[NavigableString]] = default 

516 ) -> Iterator[str]: 

517 """Yield all strings of certain classes, possibly stripping them. 

518 

519 This is implemented differently in `Tag` and `NavigableString`. 

520 """ 

521 raise NotImplementedError() 

522 

523 @property 

524 def stripped_strings(self) -> Iterator[str]: 

525 """Yield all interesting strings in this PageElement, stripping them 

526 first. 

527 

528 See `Tag` for information on which strings are considered 

529 interesting in a given context. 

530 """ 

531 for string in self._all_strings(True): 

532 yield string 

533 

534 def get_text( 

535 self, 

536 separator: str = "", 

537 strip: bool = False, 

538 types: Iterable[Type[NavigableString]] = default, 

539 ) -> str: 

540 """Get all child strings of this PageElement, concatenated using the 

541 given separator. 

542 

543 :param separator: Strings will be concatenated using this separator. 

544 

545 :param strip: If True, strings will be stripped before being 

546 concatenated. 

547 

548 :param types: A tuple of NavigableString subclasses. Any 

549 strings of a subclass not found in this list will be 

550 ignored. Although there are exceptions, the default 

551 behavior in most cases is to consider only NavigableString 

552 and CData objects. That means no comments, processing 

553 instructions, etc. 

554 

555 :return: A string. 

556 """ 

557 return separator.join([s for s in self._all_strings(strip, types=types)]) 

558 

559 getText = get_text 

560 

561 @property 

562 def text(self) -> str: 

563 return self.get_text() 

564 

565 def replace_with(self, *args: _InsertableElement) -> Self: 

566 """Replace this `PageElement` with one or more other elements, 

567 objects, keeping the rest of the tree the same. 

568 

569 :return: This `PageElement`, no longer part of the tree. 

570 """ 

571 if self.parent is None: 

572 raise ValueError( 

573 "Cannot replace one element with another when the " 

574 "element to be replaced is not part of a tree." 

575 ) 

576 if len(args) == 1 and args[0] is self: 

577 # Replacing an element with itself is a no-op. 

578 return self 

579 if any(x is self.parent for x in args): 

580 raise ValueError("Cannot replace a Tag with its parent.") 

581 old_parent = self.parent 

582 my_index = self.parent.index(self) 

583 self.extract(_self_index=my_index) 

584 for idx, replace_with in enumerate(args, start=my_index): 

585 old_parent.insert(idx, replace_with) 

586 return self 

587 

588 replaceWith = _deprecated_function_alias("replaceWith", "replace_with", "4.0.0") 

589 

590 def wrap(self, wrap_inside: Tag) -> Tag: 

591 """Wrap this `PageElement` inside a `Tag`. 

592 

593 :return: ``wrap_inside``, occupying the position in the tree that used 

594 to be occupied by this object, and with this object now inside it. 

595 """ 

596 me = self.replace_with(wrap_inside) 

597 wrap_inside.append(me) 

598 return wrap_inside 

599 

600 def extract(self, _self_index: Optional[int] = None) -> Self: 

601 """Destructively rips this element out of the tree. 

602 

603 :param _self_index: The location of this element in its parent's 

604 .contents, if known. Passing this in allows for a performance 

605 optimization. 

606 

607 :return: this `PageElement`, no longer part of the tree. 

608 """ 

609 if self.parent is not None: 

610 if _self_index is None: 

611 _self_index = self.parent.index(self) 

612 del self.parent.contents[_self_index] 

613 

614 # Find the two elements that would be next to each other if 

615 # this element (and any children) hadn't been parsed. Connect 

616 # the two. 

617 last_child = self._last_descendant() 

618 

619 # last_child can't be None because we passed accept_self=True 

620 # into _last_descendant. Worst case, last_child will be 

621 # self. Making this cast removes several mypy complaints later 

622 # on as we manipulate last_child. 

623 last_child = cast(PageElement, last_child) 

624 next_element = last_child.next_element 

625 

626 if self.previous_element is not None: 

627 if self.previous_element is not next_element: 

628 self.previous_element.next_element = next_element 

629 if next_element is not None and next_element is not self.previous_element: 

630 next_element.previous_element = self.previous_element 

631 self.previous_element = None 

632 last_child.next_element = None 

633 

634 self.parent = None 

635 if ( 

636 self.previous_sibling is not None 

637 and self.previous_sibling is not self.next_sibling 

638 ): 

639 self.previous_sibling.next_sibling = self.next_sibling 

640 if ( 

641 self.next_sibling is not None 

642 and self.next_sibling is not self.previous_sibling 

643 ): 

644 self.next_sibling.previous_sibling = self.previous_sibling 

645 self.previous_sibling = self.next_sibling = None 

646 return self 

647 

648 def decompose(self) -> None: 

649 """Recursively destroys this `PageElement` and its children. 

650 

651 The element will be removed from the tree and wiped out; so 

652 will everything beneath it. 

653 

654 The behavior of a decomposed `PageElement` is undefined and you 

655 should never use one for anything, but if you need to *check* 

656 whether an element has been decomposed, you can use the 

657 `PageElement.decomposed` property. 

658 """ 

659 self.extract() 

660 e: _AtMostOneElement = self 

661 next_up: _AtMostOneElement = None 

662 while e is not None: 

663 next_up = e.next_element 

664 e.__dict__.clear() 

665 if isinstance(e, Tag): 

666 e.name = "" 

667 e.contents = [] 

668 e._decomposed = True 

669 e = next_up 

670 

671 def _last_descendant( 

672 self, is_initialized: bool = True, accept_self: bool = True 

673 ) -> _AtMostOneElement: 

674 """Finds the last element beneath this object to be parsed. 

675 

676 Special note to help you figure things out if your type 

677 checking is tripped up by the fact that this method returns 

678 _AtMostOneElement instead of PageElement: the only time 

679 this method returns None is if `accept_self` is False and the 

680 `PageElement` has no children--either it's a NavigableString 

681 or an empty Tag. 

682 

683 :param is_initialized: Has `PageElement.setup` been called on 

684 this `PageElement` yet? 

685 

686 :param accept_self: Is ``self`` an acceptable answer to the 

687 question? 

688 """ 

689 if is_initialized and self.next_sibling is not None: 

690 last_child = self.next_sibling.previous_element 

691 else: 

692 last_child = self 

693 while isinstance(last_child, Tag) and last_child.contents: 

694 last_child = last_child.contents[-1] 

695 if not accept_self and last_child is self: 

696 last_child = None 

697 return last_child 

698 

699 _lastRecursiveChild = _deprecated_alias( 

700 "_lastRecursiveChild", "_last_descendant", "4.0.0" 

701 ) 

702 

703 def insert_before(self, *args: _InsertableElement) -> List[PageElement]: 

704 """Makes the given element(s) the immediate predecessor of this one. 

705 

706 All the elements will have the same `PageElement.parent` as 

707 this one, and the given elements will occur immediately before 

708 this one. 

709 

710 :param args: One or more PageElements. 

711 

712 :return The list of PageElements that were inserted. 

713 """ 

714 parent = self.parent 

715 if parent is None: 

716 raise ValueError("Element has no parent, so 'before' has no meaning.") 

717 if any(x is self for x in args): 

718 raise ValueError("Can't insert an element before itself.") 

719 results: List[PageElement] = [] 

720 for predecessor in args: 

721 # Extract first so that the index won't be screwed up if they 

722 # are siblings. 

723 if isinstance(predecessor, PageElement): 

724 predecessor.extract() 

725 index = parent.index(self) 

726 results.extend(parent.insert(index, predecessor)) 

727 

728 return results 

729 

730 def insert_after(self, *args: _InsertableElement) -> List[PageElement]: 

731 """Makes the given element(s) the immediate successor of this one. 

732 

733 The elements will have the same `PageElement.parent` as this 

734 one, and the given elements will occur immediately after this 

735 one. 

736 

737 :param args: One or more PageElements. 

738 

739 :return The list of PageElements that were inserted. 

740 """ 

741 # Do all error checking before modifying the tree. 

742 parent = self.parent 

743 if parent is None: 

744 raise ValueError("Element has no parent, so 'after' has no meaning.") 

745 if any(x is self for x in args): 

746 raise ValueError("Can't insert an element after itself.") 

747 

748 offset = 0 

749 results: List[PageElement] = [] 

750 for successor in args: 

751 # Extract first so that the index won't be screwed up if they 

752 # are siblings. 

753 if isinstance(successor, PageElement): 

754 successor.extract() 

755 index = parent.index(self) 

756 results.extend(parent.insert(index + 1 + offset, successor)) 

757 offset += 1 

758 

759 return results 

760 

761 def new_tag( 

762 self, 

763 name: str, 

764 namespace: Optional[str] = None, 

765 nsprefix: Optional[str] = None, 

766 attrs: Optional[_RawAttributeValues] = None, 

767 sourceline: Optional[int] = None, 

768 sourcepos: Optional[int] = None, 

769 string: Optional[str] = None, 

770 **kwattrs: _RawAttributeValue, 

771 ) -> Tag: 

772 """Create a new Tag associated with the same BeautifulSoup object as this PageElement is.""" 

773 root = self._root_object 

774 if root is None: 

775 raise ValueError("Cannot call new_tag on a PageElement not contained in a BeautifulSoup object") 

776 return root.new_tag(name, namespace, nsprefix, attrs, sourceline, sourcepos, string, **kwattrs) 

777 

778 def new_string(self, s: str, subclass: Optional[Type[NavigableString]] = None 

779 ) -> NavigableString: 

780 """Create a new NavigableString associated with the same BeautifulSoup object as this PageElement is.""" 

781 root = self._root_object 

782 if root is None: 

783 raise ValueError("Cannot call new_string on a PageElement not contained in a BeautifulSoup object") 

784 return root.new_string(s, subclass) 

785 

786 @property 

787 def _root_object(self) -> Optional[BeautifulSoup]: 

788 """Find the BeautifulSoup object used to create this PageElement, assuming it's still attached.""" 

789 parent:Optional[Tag] = self.parent 

790 while parent is not None and not parent._is_root: 

791 parent = parent.parent 

792 if parent is None: 

793 return parent 

794 return cast('BeautifulSoup', parent) 

795 

796 @property 

797 def _is_root(self) -> bool: 

798 """No, this object is not the root of its parse tree; only a BeautifulSoup object can be that.""" 

799 return False 

800 

801 # No name or attrs + string -> string 

802 @overload 

803 def find_next( 

804 self, 

805 name: None = None, 

806 attrs: None = None, 

807 *, 

808 string: _StrainableString, 

809 **kwargs: _StrainableAttribute, 

810 ) -> _AtMostOneNavigableString: 

811 ... 

812 

813 # No string -> tag 

814 @overload 

815 def find_next( 

816 self, 

817 name: _OptionalFindMethodName = None, 

818 attrs: Optional[_StrainableAttributes] = None, 

819 string: None=None, 

820 **kwargs: _StrainableAttribute, 

821 ) -> _AtMostOneTag: 

822 ... 

823 

824 def find_next( 

825 self, 

826 name: _OptionalFindMethodName = None, 

827 attrs: Optional[_StrainableAttributes] = None, 

828 string: Optional[_StrainableString] = None, 

829 **kwargs: _StrainableAttribute, 

830 ) -> Union[_AtMostOneTag,_AtMostOneNavigableString,_AtMostOneElement]: 

831 """Find the first PageElement that matches the given criteria and 

832 appears later in the document than this PageElement. 

833 

834 All find_* methods take a common set of arguments. See the online 

835 documentation for detailed explanations. 

836 

837 :param name: A filter on tag name. 

838 :param attrs: Additional filters on attribute values. 

839 :param string: A filter for a NavigableString with specific text. 

840 :kwargs: Additional filters on attribute values. 

841 """ 

842 return self._find_one(self.find_all_next, name, attrs, string, **kwargs) 

843 

844 findNext = _deprecated_function_alias("findNext", "find_next", "4.0.0") 

845 

846 # No name or attrs + string -> strings 

847 @overload 

848 def find_all_next( 

849 self, 

850 name: None = None, 

851 attrs: None = None, 

852 *, 

853 string: _StrainableString, 

854 limit: Optional[int] = None, 

855 **kwargs: _StrainableAttribute, 

856 ) -> _SomeNavigableStrings: 

857 ... 

858 

859 # No string -> tags 

860 @overload 

861 def find_all_next( 

862 self, 

863 name: _OptionalFindMethodName = None, 

864 attrs: Optional[_StrainableAttributes] = None, 

865 string: None = None, 

866 limit: Optional[int] = None, 

867 **kwargs: _StrainableAttribute, 

868 ) -> _SomeTags: 

869 ... 

870 

871 def find_all_next( 

872 self, 

873 name: _OptionalFindMethodName = None, 

874 attrs: Optional[_StrainableAttributes] = None, 

875 string: Optional[_StrainableString] = None, 

876 limit: Optional[int] = None, 

877 **kwargs: _StrainableAttribute, 

878 ) -> Union[_SomeTags,_SomeNavigableStrings,_QueryResults]: 

879 """Find all `PageElement` objects that match the given criteria and 

880 appear later in the document than this `PageElement`. 

881 

882 All find_* methods take a common set of arguments. See the online 

883 documentation for detailed explanations. 

884 

885 :param name: A filter on tag name. 

886 :param attrs: Additional filters on attribute values. 

887 :param string: A filter for a NavigableString with specific text. 

888 :param limit: Stop looking after finding this many results. 

889 :kwargs: Additional filters on attribute values. 

890 """ 

891 return self._find_all( 

892 name, 

893 attrs, 

894 string, 

895 limit, 

896 self.next_elements, 

897 **kwargs, 

898 ) 

899 

900 findAllNext = _deprecated_function_alias("findAllNext", "find_all_next", "4.0.0") 

901 

902 # No name or attrs + string -> strings 

903 @overload 

904 def find_next_sibling( 

905 self, 

906 name: None = None, 

907 attrs: None = None, 

908 *, 

909 string: _StrainableString, 

910 **kwargs: _StrainableAttribute, 

911 ) -> _AtMostOneNavigableString: 

912 ... 

913 

914 # No string -> tags 

915 @overload 

916 def find_next_sibling( 

917 self, 

918 name: _OptionalFindMethodName = None, 

919 attrs: Optional[_StrainableAttributes] = None, 

920 string: None = None, 

921 **kwargs: _StrainableAttribute, 

922 ) -> _AtMostOneTag: 

923 ... 

924 

925 def find_next_sibling( 

926 self, 

927 name: _OptionalFindMethodName = None, 

928 attrs: Optional[_StrainableAttributes] = None, 

929 string: Optional[_StrainableString] = None, 

930 **kwargs: _StrainableAttribute, 

931 ) -> Union[_AtMostOneTag,_AtMostOneNavigableString,_AtMostOneElement]: 

932 """Find the closest sibling to this PageElement that matches the 

933 given criteria and appears later in the document. 

934 

935 All find_* methods take a common set of arguments. See the 

936 online documentation for detailed explanations. 

937 

938 :param name: A filter on tag name. 

939 :param attrs: Additional filters on attribute values. 

940 :param string: A filter for a `NavigableString` with specific text. 

941 :kwargs: Additional filters on attribute values. 

942 """ 

943 return self._find_one(self.find_next_siblings, name, attrs, string, **kwargs) 

944 

945 findNextSibling = _deprecated_function_alias( 

946 "findNextSibling", "find_next_sibling", "4.0.0" 

947 ) 

948 

949 # No name or attrs + string -> strings 

950 @overload 

951 def find_next_siblings( 

952 self, 

953 name: None = None, 

954 attrs: None = None, 

955 *, 

956 string: _StrainableString, 

957 limit: Optional[int] = None, 

958 **kwargs: _StrainableAttribute, 

959 ) -> _SomeNavigableStrings: 

960 ... 

961 

962 # No string -> tags 

963 @overload 

964 def find_next_siblings( 

965 self, 

966 name: _OptionalFindMethodName = None, 

967 attrs: Optional[_StrainableAttributes] = None, 

968 string: None = None, 

969 limit: Optional[int] = None, 

970 **kwargs: _StrainableAttribute, 

971 ) -> _SomeTags: 

972 ... 

973 

974 def find_next_siblings( 

975 self, 

976 name: _OptionalFindMethodName = None, 

977 attrs: Optional[_StrainableAttributes] = None, 

978 string: Optional[_StrainableString] = None, 

979 limit: Optional[int] = None, 

980 **kwargs: _StrainableAttribute, 

981 ) -> Union[_SomeTags,_SomeNavigableStrings,_QueryResults]: 

982 """Find all siblings of this `PageElement` that match the given criteria 

983 and appear later in the document. 

984 

985 All find_* methods take a common set of arguments. See the online 

986 documentation for detailed explanations. 

987 

988 :param name: A filter on tag name. 

989 :param attrs: Additional filters on attribute values. 

990 :param string: A filter for a `NavigableString` with specific text. 

991 :param limit: Stop looking after finding this many results. 

992 :kwargs: Additional filters on attribute values. 

993 """ 

994 return self._find_all( 

995 name, 

996 attrs, 

997 string, 

998 limit, 

999 self.next_siblings, 

1000 **kwargs, 

1001 ) 

1002 

1003 findNextSiblings = _deprecated_function_alias( 

1004 "findNextSiblings", "find_next_siblings", "4.0.0" 

1005 ) 

1006 fetchNextSiblings = _deprecated_function_alias( 

1007 "fetchNextSiblings", "find_next_siblings", "3.0.0" 

1008 ) 

1009 

1010 # No name or attrs + string -> string 

1011 @overload 

1012 def find_previous( 

1013 self, 

1014 name: None = None, 

1015 attrs: None = None, 

1016 *, 

1017 string: _StrainableString, 

1018 **kwargs: _StrainableAttribute, 

1019 ) -> _AtMostOneNavigableString: 

1020 ... 

1021 

1022 # No string -> tag 

1023 @overload 

1024 def find_previous( 

1025 self, 

1026 name: _OptionalFindMethodName = None, 

1027 attrs: Optional[_StrainableAttributes] = None, 

1028 string: None=None, 

1029 **kwargs: _StrainableAttribute, 

1030 ) -> _AtMostOneTag: 

1031 ... 

1032 

1033 def find_previous( 

1034 self, 

1035 name: _OptionalFindMethodName = None, 

1036 attrs: Optional[_StrainableAttributes] = None, 

1037 string: Optional[_StrainableString] = None, 

1038 **kwargs: _StrainableAttribute, 

1039 ) -> Union[_AtMostOneTag,_AtMostOneNavigableString,_AtMostOneElement]: 

1040 """Look backwards in the document from this `PageElement` and find the 

1041 first `PageElement` that matches the given criteria. 

1042 

1043 All find_* methods take a common set of arguments. See the online 

1044 documentation for detailed explanations. 

1045 

1046 :param name: A filter on tag name. 

1047 :param attrs: Additional filters on attribute values. 

1048 :param string: A filter for a `NavigableString` with specific text. 

1049 :kwargs: Additional filters on attribute values. 

1050 """ 

1051 return self._find_one(self.find_all_previous, name, attrs, string, **kwargs) 

1052 

1053 findPrevious = _deprecated_function_alias("findPrevious", "find_previous", "3.0.0") 

1054 

1055 # No name or attrs + string -> strings 

1056 @overload 

1057 def find_all_previous( 

1058 self, 

1059 name: None = None, 

1060 attrs: None = None, 

1061 *, 

1062 string: _StrainableString, 

1063 limit: Optional[int] = None, 

1064 **kwargs: _StrainableAttribute, 

1065 ) -> _SomeNavigableStrings: 

1066 ... 

1067 

1068 # No string -> tags 

1069 @overload 

1070 def find_all_previous( 

1071 self, 

1072 name: _OptionalFindMethodName = None, 

1073 attrs: Optional[_StrainableAttributes] = None, 

1074 string: None = None, 

1075 limit: Optional[int] = None, 

1076 **kwargs: _StrainableAttribute, 

1077 ) -> _SomeTags: 

1078 ... 

1079 

1080 def find_all_previous( 

1081 self, 

1082 name: _OptionalFindMethodName = None, 

1083 attrs: Optional[_StrainableAttributes] = None, 

1084 string: Optional[_StrainableString] = None, 

1085 limit: Optional[int] = None, 

1086 **kwargs: _StrainableAttribute, 

1087 ) -> Union[_SomeTags,_SomeNavigableStrings,_QueryResults]: 

1088 """Look backwards in the document from this `PageElement` and find all 

1089 `PageElement` that match the given criteria. 

1090 

1091 All find_* methods take a common set of arguments. See the online 

1092 documentation for detailed explanations. 

1093 

1094 :param name: A filter on tag name. 

1095 :param attrs: Additional filters on attribute values. 

1096 :param string: A filter for a `NavigableString` with specific text. 

1097 :param limit: Stop looking after finding this many results. 

1098 :kwargs: Additional filters on attribute values. 

1099 """ 

1100 return self._find_all( 

1101 name, 

1102 attrs, 

1103 string, 

1104 limit, 

1105 self.previous_elements, 

1106 **kwargs, 

1107 ) 

1108 

1109 findAllPrevious = _deprecated_function_alias( 

1110 "findAllPrevious", "find_all_previous", "4.0.0" 

1111 ) 

1112 fetchAllPrevious = _deprecated_function_alias( 

1113 "fetchAllPrevious", "find_all_previous", "3.0.0" 

1114 ) 

1115 

1116 # No name or attrs + string -> string 

1117 @overload 

1118 def find_previous_sibling( 

1119 self, 

1120 name: None = None, 

1121 attrs: None = None, 

1122 *, 

1123 string: _StrainableString, 

1124 **kwargs: _StrainableAttribute, 

1125 ) -> _AtMostOneNavigableString: 

1126 ... 

1127 

1128 # No string -> tag 

1129 @overload 

1130 def find_previous_sibling( 

1131 self, 

1132 name: _OptionalFindMethodName = None, 

1133 attrs: Optional[_StrainableAttributes] = None, 

1134 string: None = None, 

1135 **kwargs: _StrainableAttribute, 

1136 ) -> _AtMostOneTag: 

1137 ... 

1138 

1139 def find_previous_sibling( 

1140 self, 

1141 name: _OptionalFindMethodName = None, 

1142 attrs: Optional[_StrainableAttributes] = None, 

1143 string: Optional[_StrainableString] = None, 

1144 **kwargs: _StrainableAttribute, 

1145 ) -> Union[_AtMostOneTag,_AtMostOneNavigableString,_AtMostOneElement]: 

1146 """Returns the closest sibling to this `PageElement` that matches the 

1147 given criteria and appears earlier in the document. 

1148 

1149 All find_* methods take a common set of arguments. See the online 

1150 documentation for detailed explanations. 

1151 

1152 :param name: A filter on tag name. 

1153 :param attrs: Additional filters on attribute values. 

1154 :param string: A filter for a `NavigableString` with specific text. 

1155 :kwargs: Additional filters on attribute values. 

1156 """ 

1157 return self._find_one( 

1158 self.find_previous_siblings, name, attrs, string, **kwargs 

1159 ) 

1160 

1161 findPreviousSibling = _deprecated_function_alias( 

1162 "findPreviousSibling", "find_previous_sibling", "4.0.0" 

1163 ) 

1164 

1165 # No name or attrs + string -> strings 

1166 @overload 

1167 def find_previous_siblings( 

1168 self, 

1169 name: None = None, 

1170 attrs: None = None, 

1171 *, 

1172 string: _StrainableString, 

1173 limit: Optional[int] = None, 

1174 **kwargs: _StrainableAttribute, 

1175 ) -> _SomeNavigableStrings: 

1176 ... 

1177 

1178 # No string -> tags 

1179 @overload 

1180 def find_previous_siblings( 

1181 self, 

1182 name: _OptionalFindMethodName = None, 

1183 attrs: Optional[_StrainableAttributes] = None, 

1184 string: None = None, 

1185 limit: Optional[int] = None, 

1186 **kwargs: _StrainableAttribute, 

1187 ) -> _SomeTags: 

1188 ... 

1189 

1190 def find_previous_siblings( 

1191 self, 

1192 name: _OptionalFindMethodName = None, 

1193 attrs: Optional[_StrainableAttributes] = None, 

1194 string: Optional[_StrainableString] = None, 

1195 limit: Optional[int] = None, 

1196 **kwargs: _StrainableAttribute, 

1197 ) -> Union[_SomeTags,_SomeNavigableStrings,_QueryResults]: 

1198 """Returns all siblings to this PageElement that match the 

1199 given criteria and appear earlier in the document. 

1200 

1201 All find_* methods take a common set of arguments. See the online 

1202 documentation for detailed explanations. 

1203 

1204 :param name: A filter on tag name. 

1205 :param attrs: Additional filters on attribute values. 

1206 :param string: A filter for a NavigableString with specific text. 

1207 :param limit: Stop looking after finding this many results. 

1208 :kwargs: Additional filters on attribute values. 

1209 """ 

1210 return self._find_all( 

1211 name, 

1212 attrs, 

1213 string, 

1214 limit, 

1215 self.previous_siblings, 

1216 **kwargs, 

1217 ) 

1218 

1219 findPreviousSiblings = _deprecated_function_alias( 

1220 "findPreviousSiblings", "find_previous_siblings", "4.0.0" 

1221 ) 

1222 fetchPreviousSiblings = _deprecated_function_alias( 

1223 "fetchPreviousSiblings", "find_previous_siblings", "3.0.0" 

1224 ) 

1225 

1226 def find_parent( 

1227 self, 

1228 name: _OptionalFindMethodName = None, 

1229 attrs: Optional[_StrainableAttributes] = None, 

1230 **kwargs: _StrainableAttribute, 

1231 ) -> _AtMostOneTag: 

1232 """Find the closest parent of this PageElement that matches the given 

1233 criteria. 

1234 

1235 All find_* methods take a common set of arguments. See the online 

1236 documentation for detailed explanations. 

1237 

1238 :param name: A filter on tag name. 

1239 :param attrs: Additional filters on attribute values. 

1240 :param self: Whether the PageElement itself should be considered 

1241 as one of its 'parents'. 

1242 :kwargs: Additional filters on attribute values. 

1243 """ 

1244 # NOTE: We can't use _find_one because findParents takes a different 

1245 # set of arguments. 

1246 r = None 

1247 results = self.find_parents( 

1248 name, attrs, 1, **kwargs 

1249 ) 

1250 if results: 

1251 r = results[0] 

1252 return r 

1253 

1254 findParent = _deprecated_function_alias("findParent", "find_parent", "4.0.0") 

1255 

1256 def find_parents( 

1257 self, 

1258 name: _OptionalFindMethodName = None, 

1259 attrs: Optional[_StrainableAttributes] = None, 

1260 limit: Optional[int] = None, 

1261 **kwargs: _StrainableAttribute, 

1262 ) -> _SomeTags: 

1263 """Find all parents of this `PageElement` that match the given criteria. 

1264 

1265 All find_* methods take a common set of arguments. See the online 

1266 documentation for detailed explanations. 

1267 

1268 :param name: A filter on tag name. 

1269 :param attrs: Additional filters on attribute values. 

1270 :param limit: Stop looking after finding this many results. 

1271 :kwargs: Additional filters on attribute values. 

1272 """ 

1273 iterator = self.parents 

1274 # Only Tags can have children, so this ResultSet will contain 

1275 # nothing but Tags. 

1276 return cast(ResultSet[Tag], self._find_all( 

1277 name, attrs, None, limit, iterator, **kwargs 

1278 )) 

1279 

1280 findParents = _deprecated_function_alias("findParents", "find_parents", "4.0.0") 

1281 fetchParents = _deprecated_function_alias("fetchParents", "find_parents", "3.0.0") 

1282 

1283 @property 

1284 def next(self) -> _AtMostOneElement: 

1285 """The `PageElement`, if any, that was parsed just after this one.""" 

1286 return self.next_element 

1287 

1288 @property 

1289 def previous(self) -> _AtMostOneElement: 

1290 """The `PageElement`, if any, that was parsed just before this one.""" 

1291 return self.previous_element 

1292 

1293 # These methods do the real heavy lifting. 

1294 

1295 def _find_one( 

1296 self, 

1297 # TODO-TYPING: "There is no syntax to indicate optional or 

1298 # keyword arguments; such function types are rarely used 

1299 # as callback types." - So, not sure how to get more 

1300 # specific here. 

1301 method: Callable, 

1302 name: _OptionalFindMethodName, 

1303 attrs: Optional[_StrainableAttributes], 

1304 string: Optional[_StrainableString], 

1305 **kwargs: _StrainableAttribute, 

1306 ) -> _AtMostOneElement: 

1307 r: _AtMostOneElement = None 

1308 results: _QueryResults = method(name, attrs, string, 1, **kwargs) 

1309 if results: 

1310 r = results[0] 

1311 return r 

1312 

1313 @property 

1314 def _warning_stack_level(self) -> int: 

1315 """Find the appropriate stack level to use when issuing a warning relating to one of the find* methods.""" 

1316 # The find* methods call each other, which makes it 

1317 # difficult to track how deep we are in the stack 

1318 # vis-a-vis the caller's entry point into the bs4.element 

1319 # module. However, we know that all of the find* methods 

1320 # are in bs4.element, and there's no code in this module 

1321 # that triggers the warnings we need to issue. 

1322 # 

1323 # (There is _test_ code that triggers the warnings, but that's 

1324 # in bs4.tests.) 

1325 # 

1326 # Therefore we can go up the stack until we leave the 

1327 # bs4.element module, and use the distance between here and 

1328 # there as the stacklevel. 

1329 stacklevel = 0 

1330 for frameinfo in inspect.stack(context=0): 

1331 if (frameinfo.frame is not None 

1332 and frameinfo.frame.f_globals is not None 

1333 and frameinfo.frame.f_globals.get('__name__', '') != "bs4.element"): 

1334 break 

1335 stacklevel += 1 

1336 return stacklevel 

1337 

1338 def _find_all( 

1339 self, 

1340 name: _OptionalFindMethodName, 

1341 attrs: Optional[_StrainableAttributes], 

1342 string: Optional[_StrainableString], 

1343 limit: Optional[int], 

1344 generator: Iterator[PageElement], 

1345 **kwargs: _StrainableAttribute, 

1346 ) -> _QueryResults: 

1347 """Iterates over a generator looking for things that match.""" 

1348 

1349 if string is None and "text" in kwargs: 

1350 string = kwargs.pop("text") 

1351 warnings.warn( 

1352 "The 'text' argument to find()-type methods is deprecated. Use 'string' instead.", 

1353 DeprecationWarning, 

1354 stacklevel=self._warning_stack_level, 

1355 ) 

1356 

1357 if "_class" in kwargs: 

1358 warnings.warn( 

1359 AttributeResemblesVariableWarning.MESSAGE 

1360 % dict( 

1361 original="_class", 

1362 autocorrect="class_", 

1363 ), 

1364 AttributeResemblesVariableWarning, 

1365 stacklevel=self._warning_stack_level, 

1366 ) 

1367 

1368 from bs4.filter import ElementFilter 

1369 

1370 if isinstance(name, ElementFilter): 

1371 matcher = name 

1372 else: 

1373 matcher = SoupStrainer(name, attrs, string, **kwargs) 

1374 

1375 result: MutableSequence[_OneElement] 

1376 if string is None and not limit and not attrs and not kwargs: 

1377 if name is True or name is None: 

1378 # Optimization to find all tags. 

1379 result = [element for element in generator if isinstance(element, Tag)] 

1380 return ResultSet(matcher, result) 

1381 elif isinstance(name, str): 

1382 # Optimization to find all tags with a given name. 

1383 if name.count(":") == 1: 

1384 # This is a name with a prefix. If this is a namespace-aware document, 

1385 # we need to match the local name against tag.name. If not, 

1386 # we need to match the fully-qualified name against tag.name. 

1387 prefix, local_name = name.split(":", 1) 

1388 else: 

1389 prefix = None 

1390 local_name = name 

1391 result = [] 

1392 for element in generator: 

1393 if not isinstance(element, Tag): 

1394 continue 

1395 if element.name == name or ( 

1396 element.name == local_name 

1397 and (prefix is None or element.prefix == prefix) 

1398 ): 

1399 result.append(element) 

1400 return ResultSet(matcher, result) 

1401 return matcher.find_all(generator, limit) 

1402 

1403 # These generators can be used to navigate starting from both 

1404 # NavigableStrings and Tags. 

1405 @property 

1406 def next_elements(self) -> Iterator[PageElement]: 

1407 """All PageElements that were parsed after this one.""" 

1408 i = self.next_element 

1409 while i is not None: 

1410 successor = i.next_element 

1411 yield i 

1412 i = successor 

1413 

1414 @property 

1415 def self_and_next_elements(self) -> Iterator[PageElement]: 

1416 """This PageElement, then all PageElements that were parsed after it.""" 

1417 return self._self_and(self.next_elements) 

1418 

1419 @property 

1420 def next_siblings(self) -> Iterator[PageElement]: 

1421 """All PageElements that are siblings of this one but were parsed 

1422 later. 

1423 """ 

1424 i = self.next_sibling 

1425 while i is not None: 

1426 successor = i.next_sibling 

1427 yield i 

1428 i = successor 

1429 

1430 @property 

1431 def self_and_next_siblings(self) -> Iterator[PageElement]: 

1432 """This PageElement, then all of its siblings.""" 

1433 return self._self_and(self.next_siblings) 

1434 

1435 @property 

1436 def previous_elements(self) -> Iterator[PageElement]: 

1437 """All PageElements that were parsed before this one. 

1438 

1439 :yield: A sequence of PageElements. 

1440 """ 

1441 i = self.previous_element 

1442 while i is not None: 

1443 successor = i.previous_element 

1444 yield i 

1445 i = successor 

1446 

1447 @property 

1448 def self_and_previous_elements(self) -> Iterator[PageElement]: 

1449 """This PageElement, then all elements that were parsed 

1450 earlier.""" 

1451 return self._self_and(self.previous_elements) 

1452 

1453 @property 

1454 def previous_siblings(self) -> Iterator[PageElement]: 

1455 """All PageElements that are siblings of this one but were parsed 

1456 earlier. 

1457 

1458 :yield: A sequence of PageElements. 

1459 """ 

1460 i = self.previous_sibling 

1461 while i is not None: 

1462 successor = i.previous_sibling 

1463 yield i 

1464 i = successor 

1465 

1466 @property 

1467 def self_and_previous_siblings(self) -> Iterator[PageElement]: 

1468 """This PageElement, then all of its siblings that were parsed 

1469 earlier.""" 

1470 return self._self_and(self.previous_siblings) 

1471 

1472 @property 

1473 def parents(self) -> Iterator[Tag]: 

1474 """All elements that are parents of this PageElement. 

1475 

1476 :yield: A sequence of Tags, ending with a BeautifulSoup object. 

1477 """ 

1478 i = self.parent 

1479 while i is not None: 

1480 successor = i.parent 

1481 yield i 

1482 i = successor 

1483 

1484 @property 

1485 def self_and_parents(self) -> Iterator[PageElement]: 

1486 """This element, then all of its parents. 

1487 

1488 :yield: A sequence of PageElements, ending with a BeautifulSoup object. 

1489 """ 

1490 return self._self_and(self.parents) 

1491 

1492 def _self_and(self, other_generator:Iterator[PageElement]) -> Iterator[PageElement]: 

1493 """Modify a generator by yielding this element, then everything 

1494 yielded by the other generator. 

1495 """ 

1496 if not self.hidden: 

1497 yield self 

1498 for i in other_generator: 

1499 yield i 

1500 

1501 @property 

1502 def decomposed(self) -> bool: 

1503 """Check whether a PageElement has been decomposed.""" 

1504 return getattr(self, "_decomposed", False) or False 

1505 

1506 @_deprecated("next_elements", "4.0.0") 

1507 def nextGenerator(self) -> Iterator[PageElement]: 

1508 ":meta private:" 

1509 return self.next_elements 

1510 

1511 @_deprecated("next_siblings", "4.0.0") 

1512 def nextSiblingGenerator(self) -> Iterator[PageElement]: 

1513 ":meta private:" 

1514 return self.next_siblings 

1515 

1516 @_deprecated("previous_elements", "4.0.0") 

1517 def previousGenerator(self) -> Iterator[PageElement]: 

1518 ":meta private:" 

1519 return self.previous_elements 

1520 

1521 @_deprecated("previous_siblings", "4.0.0") 

1522 def previousSiblingGenerator(self) -> Iterator[PageElement]: 

1523 ":meta private:" 

1524 return self.previous_siblings 

1525 

1526 @_deprecated("parents", "4.0.0") 

1527 def parentGenerator(self) -> Iterator[PageElement]: 

1528 ":meta private:" 

1529 return self.parents 

1530 

1531 

1532class NavigableString(str, PageElement): 

1533 """A Python string that is part of a parse tree. 

1534 

1535 When Beautiful Soup parses the markup ``<b>penguin</b>``, it will 

1536 create a `NavigableString` for the string "penguin". 

1537 """ 

1538 

1539 #: A string prepended to the body of the 'real' string 

1540 #: when formatting it as part of a document, such as the '<!--' 

1541 #: in an HTML comment. 

1542 PREFIX: str = "" 

1543 

1544 #: A string appended to the body of the 'real' string 

1545 #: when formatting it as part of a document, such as the '-->' 

1546 #: in an HTML comment. 

1547 SUFFIX: str = "" 

1548 

1549 def __new__(cls, value: Union[str, bytes]) -> Self: 

1550 """Create a new NavigableString. 

1551 

1552 When unpickling a NavigableString, this method is called with 

1553 the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be 

1554 passed in to the superclass's __new__ or the superclass won't know 

1555 how to handle non-ASCII characters. 

1556 """ 

1557 if isinstance(value, str): 

1558 u = str.__new__(cls, value) 

1559 else: 

1560 u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) 

1561 u.hidden = False 

1562 u.setup() 

1563 return u 

1564 

1565 def __deepcopy__(self, memo: Dict[Any, Any], recursive: bool = False) -> Self: 

1566 """A copy of a NavigableString has the same contents and class 

1567 as the original, but it is not connected to the parse tree. 

1568 

1569 :param recursive: This parameter is ignored; it's only defined 

1570 so that NavigableString.__deepcopy__ implements the same 

1571 signature as Tag.__deepcopy__. 

1572 """ 

1573 return type(self)(self) 

1574 

1575 def __getnewargs__(self) -> Tuple[str]: 

1576 return (str(self),) 

1577 

1578 # TODO-TYPING This should be SupportsIndex|slice but SupportsIndex 

1579 # is introduced in 3.8. This can be changed once 3.7 support is dropped. 

1580 def __getitem__(self, key: Union[int|slice]) -> str: # type:ignore 

1581 """Raise an exception """ 

1582 if isinstance(key, str): 

1583 raise TypeError("string indices must be integers, not '{0}'. Are you treating a NavigableString like a Tag?".format(key.__class__.__name__)) 

1584 return super(NavigableString, self).__getitem__(key) 

1585 

1586 @property 

1587 def string(self) -> str: 

1588 """Convenience property defined to match `Tag.string`. 

1589 

1590 :return: This property always returns the `NavigableString` it was 

1591 called on. 

1592 

1593 :meta private: 

1594 """ 

1595 return self 

1596 

1597 def output_ready(self, formatter: _FormatterOrName = "minimal") -> str: 

1598 """Run the string through the provided formatter, making it 

1599 ready for output as part of an HTML or XML document. 

1600 

1601 :param formatter: A `Formatter` object, or a string naming one 

1602 of the standard formatters. 

1603 """ 

1604 output = self.format_string(self, formatter) 

1605 return self.PREFIX + output + self.SUFFIX 

1606 

1607 @property 

1608 def name(self) -> None: 

1609 """Since a NavigableString is not a Tag, it has no .name. 

1610 

1611 This property is implemented so that code like this doesn't crash 

1612 when run on a mixture of Tag and NavigableString objects: 

1613 [x.name for x in tag.children] 

1614 

1615 :meta private: 

1616 """ 

1617 return None 

1618 

1619 @name.setter 

1620 def name(self, name: str) -> None: 

1621 """Prevent NavigableString.name from ever being set. 

1622 

1623 :meta private: 

1624 """ 

1625 raise AttributeError("A NavigableString cannot be given a name.") 

1626 

1627 def _all_strings( 

1628 self, strip: bool = False, types: _OneOrMoreStringTypes = PageElement.default 

1629 ) -> Iterator[str]: 

1630 """Yield all strings of certain classes, possibly stripping them. 

1631 

1632 This makes it easy for NavigableString to implement methods 

1633 like get_text() as conveniences, creating a consistent 

1634 text-extraction API across all PageElements. 

1635 

1636 :param strip: If True, all strings will be stripped before being 

1637 yielded. 

1638 

1639 :param types: A tuple of NavigableString subclasses. If this 

1640 NavigableString isn't one of those subclasses, the 

1641 sequence will be empty. By default, the subclasses 

1642 considered are NavigableString and CData objects. That 

1643 means no comments, processing instructions, etc. 

1644 

1645 :yield: A sequence that either contains this string, or is empty. 

1646 """ 

1647 if types is self.default: 

1648 # This is kept in Tag because it's full of subclasses of 

1649 # this class, which aren't defined until later in the file. 

1650 types = Tag.MAIN_CONTENT_STRING_TYPES 

1651 

1652 # Do nothing if the caller is looking for specific types of 

1653 # string, and we're of a different type. 

1654 # 

1655 # We check specific types instead of using isinstance(self, 

1656 # types) because all of these classes subclass 

1657 # NavigableString. Anyone who's using this feature probably 

1658 # wants generic NavigableStrings but not other stuff. 

1659 my_type = type(self) 

1660 if types is not None: 

1661 if isinstance(types, type): 

1662 # Looking for a single type. 

1663 if my_type is not types: 

1664 return 

1665 elif my_type not in types: 

1666 # Looking for one of a list of types. 

1667 return 

1668 

1669 value = self 

1670 if strip: 

1671 final_value = value.strip() 

1672 else: 

1673 final_value = self 

1674 if len(final_value) > 0: 

1675 yield final_value 

1676 

1677 @property 

1678 def strings(self) -> Iterator[str]: 

1679 """Yield this string, but only if it is interesting. 

1680 

1681 This is defined the way it is for compatibility with 

1682 `Tag.strings`. See `Tag` for information on which strings are 

1683 interesting in a given context. 

1684 

1685 :yield: A sequence that either contains this string, or is empty. 

1686 """ 

1687 return self._all_strings() 

1688 

1689 

1690class PreformattedString(NavigableString): 

1691 """A `NavigableString` not subject to the normal formatting rules. 

1692 

1693 This is an abstract class used for special kinds of strings such 

1694 as comments (`Comment`) and CDATA blocks (`CData`). 

1695 """ 

1696 

1697 PREFIX: str = "" 

1698 SUFFIX: str = "" 

1699 

1700 def output_ready(self, formatter: Optional[_FormatterOrName] = None) -> str: 

1701 """Make this string ready for output by adding any subclass-specific 

1702 prefix or suffix. 

1703 

1704 :param formatter: A `Formatter` object, or a string naming one 

1705 of the standard formatters. The string will be passed into the 

1706 `Formatter`, but only to trigger any side effects: the return 

1707 value is ignored. 

1708 

1709 :return: The string, with any subclass-specific prefix and 

1710 suffix added on. 

1711 """ 

1712 if formatter is not None: 

1713 self.format_string(self, formatter) 

1714 return self.PREFIX + self + self.SUFFIX 

1715 

1716 

1717class CData(PreformattedString): 

1718 """A `CDATA section <https://dev.w3.org/html5/spec-LC/syntax.html#cdata-sections>`_.""" 

1719 

1720 PREFIX: str = "<![CDATA[" 

1721 SUFFIX: str = "]]>" 

1722 

1723 

1724class ProcessingInstruction(PreformattedString): 

1725 """A SGML processing instruction.""" 

1726 

1727 PREFIX: str = "<?" 

1728 SUFFIX: str = ">" 

1729 

1730 

1731class XMLProcessingInstruction(ProcessingInstruction): 

1732 """An `XML processing instruction <https://www.w3.org/TR/REC-xml/#sec-pi>`_.""" 

1733 

1734 PREFIX: str = "<?" 

1735 SUFFIX: str = "?>" 

1736 

1737 

1738class Comment(PreformattedString): 

1739 """An `HTML comment <https://dev.w3.org/html5/spec-LC/syntax.html#comments>`_ or `XML comment <https://www.w3.org/TR/REC-xml/#sec-comments>`_.""" 

1740 

1741 PREFIX: str = "<!--" 

1742 SUFFIX: str = "-->" 

1743 

1744 

1745class Declaration(PreformattedString): 

1746 """An `XML declaration <https://www.w3.org/TR/REC-xml/#sec-prolog-dtd>`_.""" 

1747 

1748 PREFIX: str = "<?" 

1749 SUFFIX: str = "?>" 

1750 

1751 

1752class Doctype(PreformattedString): 

1753 """A `document type declaration <https://www.w3.org/TR/REC-xml/#dt-doctype>`_.""" 

1754 

1755 @classmethod 

1756 def for_name_and_ids( 

1757 cls, name: str, pub_id: Optional[str], system_id: Optional[str] 

1758 ) -> Doctype: 

1759 """Generate an appropriate document type declaration for a given 

1760 public ID and system ID. 

1761 

1762 :param name: The name of the document's root element, e.g. 'html'. 

1763 :param pub_id: The Formal Public Identifier for this document type, 

1764 e.g. '-//W3C//DTD XHTML 1.1//EN' 

1765 :param system_id: The system identifier for this document type, 

1766 e.g. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' 

1767 """ 

1768 return Doctype(cls._string_for_name_and_ids(name, pub_id, system_id)) 

1769 

1770 @classmethod 

1771 def _string_for_name_and_ids( 

1772 cls, name: str, pub_id: Optional[str], system_id: Optional[str] 

1773 ) -> str: 

1774 """Generate a string to be used as the basis of a Doctype object. 

1775 

1776 This is a separate method from for_name_and_ids() because the lxml 

1777 TreeBuilder needs to call it. 

1778 """ 

1779 value = name or "" 

1780 if pub_id is not None: 

1781 value += ' PUBLIC "%s"' % pub_id 

1782 if system_id is not None: 

1783 value += ' "%s"' % system_id 

1784 elif system_id is not None: 

1785 value += ' SYSTEM "%s"' % system_id 

1786 return value 

1787 

1788 PREFIX: str = "<!DOCTYPE " 

1789 SUFFIX: str = ">\n" 

1790 

1791 

1792class Stylesheet(NavigableString): 

1793 """A `NavigableString` representing the contents of a `<style> HTML 

1794 tag <https://dev.w3.org/html5/spec-LC/Overview.html#the-style-element>`_ 

1795 (probably CSS). 

1796 

1797 Used to distinguish embedded stylesheets from textual content. 

1798 """ 

1799 

1800 

1801class Script(NavigableString): 

1802 """A `NavigableString` representing the contents of a `<script> 

1803 HTML tag 

1804 <https://dev.w3.org/html5/spec-LC/Overview.html#the-script-element>`_ 

1805 (probably Javascript). 

1806 

1807 Used to distinguish executable code from textual content. 

1808 """ 

1809 

1810 

1811class TemplateString(NavigableString): 

1812 """A `NavigableString` representing a string found inside an `HTML 

1813 <template> tag <https://html.spec.whatwg.org/multipage/scripting.html#the-template-element>`_ 

1814 embedded in a larger document. 

1815 

1816 Used to distinguish such strings from the main body of the document. 

1817 """ 

1818 

1819 

1820class RubyTextString(NavigableString): 

1821 """A NavigableString representing the contents of an `<rt> HTML 

1822 tag <https://dev.w3.org/html5/spec-LC/text-level-semantics.html#the-rt-element>`_. 

1823 

1824 Can be used to distinguish such strings from the strings they're 

1825 annotating. 

1826 """ 

1827 

1828 

1829class RubyParenthesisString(NavigableString): 

1830 """A NavigableString representing the contents of an `<rp> HTML 

1831 tag <https://dev.w3.org/html5/spec-LC/text-level-semantics.html#the-rp-element>`_. 

1832 """ 

1833 

1834 

1835class Tag(PageElement): 

1836 """An HTML or XML tag that is part of a parse tree, along with its 

1837 attributes, contents, and relationships to other parts of the tree. 

1838 

1839 When Beautiful Soup parses the markup ``<b>penguin</b>``, it will 

1840 create a `Tag` object representing the ``<b>`` tag. You can 

1841 instantiate `Tag` objects directly, but it's not necessary unless 

1842 you're adding entirely new markup to a parsed document. Most of 

1843 the constructor arguments are intended for use by the `TreeBuilder` 

1844 that's parsing a document. 

1845 

1846 :param parser: A `BeautifulSoup` object representing the parse tree this 

1847 `Tag` will be part of. 

1848 :param builder: The `TreeBuilder` being used to build the tree. 

1849 :param name: The name of the tag. 

1850 :param namespace: The URI of this tag's XML namespace, if any. 

1851 :param prefix: The prefix for this tag's XML namespace, if any. 

1852 :param attrs: A dictionary of attribute values. 

1853 :param parent: The `Tag` to use as the parent of this `Tag`. May be 

1854 the `BeautifulSoup` object itself. 

1855 :param previous: The `PageElement` that was parsed immediately before 

1856 parsing this tag. 

1857 :param is_xml: If True, this is an XML tag. Otherwise, this is an 

1858 HTML tag. 

1859 :param sourceline: The line number where this tag was found in its 

1860 source document. 

1861 :param sourcepos: The character position within ``sourceline`` where this 

1862 tag was found. 

1863 :param can_be_empty_element: If True, this tag should be 

1864 represented as <tag/>. If False, this tag should be represented 

1865 as <tag></tag>. 

1866 :param cdata_list_attributes: A dictionary of attributes whose values should 

1867 be parsed as lists of strings if they ever show up on this tag. 

1868 :param preserve_whitespace_tags: Names of tags whose contents 

1869 should have their whitespace preserved if they are encountered inside 

1870 this tag. 

1871 :param interesting_string_types: When iterating over this tag's 

1872 string contents in methods like `Tag.strings` or 

1873 `PageElement.get_text`, these are the types of strings that are 

1874 interesting enough to be considered. By default, 

1875 `NavigableString` (normal strings) and `CData` (CDATA 

1876 sections) are the only interesting string subtypes. 

1877 :param namespaces: A dictionary mapping currently active 

1878 namespace prefixes to URIs, as of the point in the parsing process when 

1879 this tag was encountered. This can be used later to 

1880 construct CSS selectors. 

1881 

1882 """ 

1883 

1884 def __init__( 

1885 self, 

1886 parser: Optional[BeautifulSoup] = None, 

1887 builder: Optional[TreeBuilder] = None, 

1888 name: Optional[str] = None, 

1889 namespace: Optional[str] = None, 

1890 prefix: Optional[str] = None, 

1891 attrs: Optional[_RawOrProcessedAttributeValues] = None, 

1892 parent: Optional[Union[BeautifulSoup, Tag]] = None, 

1893 previous: _AtMostOneElement = None, 

1894 is_xml: Optional[bool] = None, 

1895 sourceline: Optional[int] = None, 

1896 sourcepos: Optional[int] = None, 

1897 can_be_empty_element: Optional[bool] = None, 

1898 cdata_list_attributes: Optional[Dict[str, Set[str]]] = None, 

1899 preserve_whitespace_tags: Optional[Set[str]] = None, 

1900 interesting_string_types: Optional[Set[Type[NavigableString]]] = None, 

1901 namespaces: Optional[Dict[str, str]] = None, 

1902 # NOTE: Any new arguments here need to be mirrored in 

1903 # Tag.copy_self, and potentially BeautifulSoup.new_tag 

1904 # as well. 

1905 ): 

1906 if parser is None: 

1907 self.parser_class = None 

1908 else: 

1909 # We don't actually store the parser object: that lets extracted 

1910 # chunks be garbage-collected. 

1911 self.parser_class = parser.__class__ 

1912 if name is None: 

1913 raise ValueError("No value provided for new tag's name.") 

1914 self.name = name 

1915 self.namespace = namespace 

1916 self._namespaces = namespaces or {} 

1917 self.prefix = prefix 

1918 if (not builder or builder.store_line_numbers) and ( 

1919 sourceline is not None or sourcepos is not None 

1920 ): 

1921 self.sourceline = sourceline 

1922 self.sourcepos = sourcepos 

1923 else: 

1924 self.sourceline = sourceline 

1925 self.sourcepos = sourcepos 

1926 

1927 attr_dict_class: type[AttributeDict] 

1928 attribute_value_list_class: type[AttributeValueList] 

1929 if builder is None: 

1930 if is_xml: 

1931 attr_dict_class = XMLAttributeDict 

1932 else: 

1933 attr_dict_class = HTMLAttributeDict 

1934 attribute_value_list_class = AttributeValueList 

1935 else: 

1936 attr_dict_class = builder.attribute_dict_class 

1937 attribute_value_list_class = builder.attribute_value_list_class 

1938 self.attribute_value_list_class = attribute_value_list_class 

1939 

1940 if attrs is None: 

1941 self.attrs = attr_dict_class() 

1942 else: 

1943 if builder is not None and builder.cdata_list_attributes: 

1944 self.attrs = builder._replace_cdata_list_attribute_values( 

1945 self.name, attrs 

1946 ) 

1947 else: 

1948 self.attrs = attr_dict_class() 

1949 # Make sure that the values of any multi-valued 

1950 # attributes (e.g. when a Tag is copied) are stored in 

1951 # new lists. 

1952 for k, v in attrs.items(): 

1953 if isinstance(v, list): 

1954 v = v.__class__(v) 

1955 self.attrs[k] = v 

1956 

1957 # If possible, determine ahead of time whether this tag is an 

1958 # XML tag. 

1959 if builder: 

1960 self.known_xml = builder.is_xml 

1961 else: 

1962 self.known_xml = is_xml 

1963 self.contents: List[PageElement] = [] 

1964 self.setup(parent, previous) 

1965 self.hidden = False 

1966 

1967 if builder is None: 

1968 # In the absence of a TreeBuilder, use whatever values were 

1969 # passed in here. They're probably None, unless this is a copy of some 

1970 # other tag. 

1971 self.can_be_empty_element = can_be_empty_element 

1972 self.cdata_list_attributes = cdata_list_attributes 

1973 self.preserve_whitespace_tags = preserve_whitespace_tags 

1974 self.interesting_string_types = interesting_string_types 

1975 else: 

1976 # Set up any substitutions for this tag, such as the charset in a META tag. 

1977 self.attribute_value_list_class = builder.attribute_value_list_class 

1978 builder.set_up_substitutions(self) 

1979 

1980 # Ask the TreeBuilder whether this tag might be an empty-element tag. 

1981 self.can_be_empty_element = builder.can_be_empty_element(name) 

1982 

1983 # Keep track of the list of attributes of this tag that 

1984 # might need to be treated as a list. 

1985 # 

1986 # For performance reasons, we store the whole data structure 

1987 # rather than asking the question of every tag. Asking would 

1988 # require building a new data structure every time, and 

1989 # (unlike can_be_empty_element), we almost never need 

1990 # to check this. 

1991 self.cdata_list_attributes = builder.cdata_list_attributes 

1992 

1993 # Keep track of the names that might cause this tag to be treated as a 

1994 # whitespace-preserved tag. 

1995 self.preserve_whitespace_tags = builder.preserve_whitespace_tags 

1996 

1997 if self.name in builder.string_containers: 

1998 # This sort of tag uses a special string container 

1999 # subclass for most of its strings. We need to be able 

2000 # to look up the proper container subclass. 

2001 self.interesting_string_types = {builder.string_containers[self.name]} 

2002 else: 

2003 self.interesting_string_types = self.MAIN_CONTENT_STRING_TYPES 

2004 

2005 parser_class: Optional[type[BeautifulSoup]] 

2006 name: str 

2007 namespace: Optional[str] 

2008 prefix: Optional[str] 

2009 attrs: _AttributeValues 

2010 sourceline: Optional[int] 

2011 sourcepos: Optional[int] 

2012 known_xml: Optional[bool] 

2013 contents: List[PageElement] 

2014 hidden: bool 

2015 interesting_string_types: Optional[Set[Type[NavigableString]]] 

2016 

2017 can_be_empty_element: Optional[bool] 

2018 cdata_list_attributes: Optional[Dict[str, Set[str]]] 

2019 preserve_whitespace_tags: Optional[Set[str]] 

2020 

2021 #: :meta private: 

2022 parserClass = _deprecated_alias("parserClass", "parser_class", "4.0.0") 

2023 

2024 def __deepcopy__(self, memo: Dict[Any, Any], recursive: bool = True) -> Self: 

2025 """A deepcopy of a Tag is a new Tag, unconnected to the parse tree. 

2026 Its contents are a copy of the old Tag's contents. 

2027 """ 

2028 clone = self.copy_self() 

2029 

2030 if recursive: 

2031 # Clone this tag's descendants recursively, but without 

2032 # making any recursive function calls. 

2033 tag_stack: List[Tag] = [clone] 

2034 for event, element in self._event_stream(self.descendants): 

2035 if event is Tag.END_ELEMENT_EVENT: 

2036 # Stop appending incoming Tags to the Tag that was 

2037 # just closed. 

2038 tag_stack.pop() 

2039 else: 

2040 descendant_clone = element.__deepcopy__(memo, recursive=False) 

2041 # Add to its parent's .contents 

2042 tag_stack[-1].append(descendant_clone) 

2043 

2044 if event is Tag.START_ELEMENT_EVENT: 

2045 # Add the Tag itself to the stack so that its 

2046 # children will be .appended to it. 

2047 tag_stack.append(cast(Tag, descendant_clone)) 

2048 return clone 

2049 

2050 def copy_self(self) -> Self: 

2051 """Create a new Tag just like this one, but with no 

2052 contents and unattached to any parse tree. 

2053 

2054 This is the first step in the deepcopy process, but you can 

2055 call it on its own to create a copy of a Tag without copying its 

2056 contents. 

2057 """ 

2058 clone = type(self)( 

2059 None, 

2060 None, 

2061 self.name, 

2062 self.namespace, 

2063 self.prefix, 

2064 self.attrs, 

2065 is_xml=self._is_xml, 

2066 sourceline=self.sourceline, 

2067 sourcepos=self.sourcepos, 

2068 can_be_empty_element=self.can_be_empty_element, 

2069 cdata_list_attributes=self.cdata_list_attributes, 

2070 preserve_whitespace_tags=self.preserve_whitespace_tags, 

2071 interesting_string_types=self.interesting_string_types, 

2072 namespaces=self._namespaces, 

2073 ) 

2074 for attr in ("can_be_empty_element", "hidden"): 

2075 setattr(clone, attr, getattr(self, attr)) 

2076 return clone 

2077 

2078 @property 

2079 def is_empty_element(self) -> bool: 

2080 """Is this tag an empty-element tag? (aka a self-closing tag) 

2081 

2082 A tag that has contents is never an empty-element tag. 

2083 

2084 A tag that has no contents may or may not be an empty-element 

2085 tag. It depends on the `TreeBuilder` used to create the 

2086 tag. If the builder has a designated list of empty-element 

2087 tags, then only a tag whose name shows up in that list is 

2088 considered an empty-element tag. This is usually the case 

2089 for HTML documents. 

2090 

2091 If the builder has no designated list of empty-element, then 

2092 any tag with no contents is an empty-element tag. This is usually 

2093 the case for XML documents. 

2094 """ 

2095 return len(self.contents) == 0 and self.can_be_empty_element is True 

2096 

2097 @_deprecated("is_empty_element", "4.0.0") 

2098 def isSelfClosing(self) -> bool: 

2099 ": :meta private:" 

2100 return self.is_empty_element 

2101 

2102 @property 

2103 def string(self) -> Optional[str]: 

2104 """Convenience property to get the single string within this 

2105 `Tag`, assuming there is just one. 

2106 

2107 :return: If this `Tag` has a single child that's a 

2108 `NavigableString`, the return value is that string. If this 

2109 element has one child `Tag`, the return value is that child's 

2110 `Tag.string`, recursively. If this `Tag` has no children, 

2111 or has more than one child, the return value is ``None``. 

2112 

2113 If this property is unexpectedly returning ``None`` for you, 

2114 it's probably because your `Tag` has more than one thing 

2115 inside it. 

2116 """ 

2117 if len(self.contents) != 1: 

2118 return None 

2119 child = self.contents[0] 

2120 if isinstance(child, NavigableString): 

2121 return child 

2122 elif isinstance(child, Tag): 

2123 return child.string 

2124 return None 

2125 

2126 @string.setter 

2127 def string(self, string: str) -> None: 

2128 """Replace the `Tag.contents` of this `Tag` with a single string.""" 

2129 self.clear() 

2130 if isinstance(string, NavigableString): 

2131 new_class = string.__class__ 

2132 else: 

2133 new_class = NavigableString 

2134 self.append(new_class(string)) 

2135 

2136 #: :meta private: 

2137 MAIN_CONTENT_STRING_TYPES = {NavigableString, CData} 

2138 

2139 def _all_strings( 

2140 self, strip: bool = False, types: _OneOrMoreStringTypes = PageElement.default 

2141 ) -> Iterator[str]: 

2142 """Yield all strings of certain classes, possibly stripping them. 

2143 

2144 :param strip: If True, all strings will be stripped before being 

2145 yielded. 

2146 

2147 :param types: A tuple of NavigableString subclasses. Any strings of 

2148 a subclass not found in this list will be ignored. By 

2149 default, the subclasses considered are the ones found in 

2150 self.interesting_string_types. If that's not specified, 

2151 only NavigableString and CData objects will be 

2152 considered. That means no comments, processing 

2153 instructions, etc. 

2154 """ 

2155 if types is self.default: 

2156 if self.interesting_string_types is None: 

2157 types = self.MAIN_CONTENT_STRING_TYPES 

2158 else: 

2159 types = self.interesting_string_types 

2160 

2161 for descendant in self.descendants: 

2162 if not isinstance(descendant, NavigableString): 

2163 continue 

2164 descendant_type = type(descendant) 

2165 if isinstance(types, type): 

2166 if descendant_type is not types: 

2167 # We're not interested in strings of this type. 

2168 continue 

2169 elif types is not None and descendant_type not in types: 

2170 # We're not interested in strings of this type. 

2171 continue 

2172 if strip: 

2173 stripped = descendant.strip() 

2174 if len(stripped) == 0: 

2175 continue 

2176 yield stripped 

2177 else: 

2178 yield descendant 

2179 

2180 strings = property(_all_strings) 

2181 

2182 def insert(self, position: int, *new_children: _InsertableElement) -> List[PageElement]: 

2183 """Insert one or more new PageElements as a child of this `Tag`. 

2184 

2185 This works similarly to :py:meth:`list.insert`, except you can insert 

2186 multiple elements at once. 

2187 

2188 :param position: The numeric position that should be occupied 

2189 in this Tag's `Tag.children` by the first new `PageElement`. 

2190 

2191 :param new_children: The PageElements to insert. 

2192 

2193 :return The newly inserted PageElements. 

2194 """ 

2195 inserted: List[PageElement] = [] 

2196 for new_child in new_children: 

2197 inserted.extend(self._insert(position, new_child)) 

2198 position += 1 

2199 return inserted 

2200 

2201 def _insert(self, position: int, new_child: _InsertableElement) -> List[PageElement]: 

2202 if new_child is None: 

2203 raise ValueError("Cannot insert None into a tag.") 

2204 if new_child is self: 

2205 raise ValueError("Cannot insert a tag into itself.") 

2206 if isinstance(new_child, str) and not isinstance(new_child, NavigableString): 

2207 new_child = NavigableString(new_child) 

2208 

2209 from bs4 import BeautifulSoup 

2210 if isinstance(new_child, BeautifulSoup): 

2211 # We don't want to end up with a situation where one BeautifulSoup 

2212 # object contains another. Insert the BeautifulSoup's children and 

2213 # return them. 

2214 return self.insert(position, *list(new_child.contents)) 

2215 position = min(position, len(self.contents)) 

2216 if hasattr(new_child, "parent") and new_child.parent is not None: 

2217 # We're 'inserting' an element that's already one 

2218 # of this object's children. 

2219 if new_child.parent is self: 

2220 current_index = self.index(new_child) 

2221 if current_index < position: 

2222 # We're moving this element further down the list 

2223 # of this object's children. That means that when 

2224 # we extract this element, our target index will 

2225 # jump down one. 

2226 position -= 1 

2227 elif current_index == position: 

2228 # We're 'inserting' an element into its current location. 

2229 # This is a no-op. 

2230 return [new_child] 

2231 new_child.extract() 

2232 

2233 new_child.parent = self 

2234 previous_child = None 

2235 if position == 0: 

2236 new_child.previous_sibling = None 

2237 new_child.previous_element = self 

2238 else: 

2239 previous_child = self.contents[position - 1] 

2240 new_child.previous_sibling = previous_child 

2241 new_child.previous_sibling.next_sibling = new_child 

2242 new_child.previous_element = previous_child._last_descendant(False) 

2243 if new_child.previous_element is not None: 

2244 new_child.previous_element.next_element = new_child 

2245 

2246 new_childs_last_element = new_child._last_descendant( 

2247 is_initialized=False, accept_self=True 

2248 ) 

2249 # new_childs_last_element can't be None because we passed 

2250 # accept_self=True into _last_descendant. Worst case, 

2251 # new_childs_last_element will be new_child itself. Making 

2252 # this cast removes several mypy complaints later on as we 

2253 # manipulate new_childs_last_element. 

2254 new_childs_last_element = cast(PageElement, new_childs_last_element) 

2255 

2256 if position >= len(self.contents): 

2257 new_child.next_sibling = None 

2258 

2259 parent: Optional[Tag] = self 

2260 parents_next_sibling = None 

2261 while parents_next_sibling is None and parent is not None: 

2262 parents_next_sibling = parent.next_sibling 

2263 parent = parent.parent 

2264 if parents_next_sibling is not None: 

2265 # We found the element that comes next in the document. 

2266 break 

2267 if parents_next_sibling is not None: 

2268 new_childs_last_element.next_element = parents_next_sibling 

2269 else: 

2270 # The last element of this tag is the last element in 

2271 # the document. 

2272 new_childs_last_element.next_element = None 

2273 else: 

2274 next_child = self.contents[position] 

2275 new_child.next_sibling = next_child 

2276 if new_child.next_sibling is not None: 

2277 new_child.next_sibling.previous_sibling = new_child 

2278 new_childs_last_element.next_element = next_child 

2279 

2280 if new_childs_last_element.next_element is not None: 

2281 new_childs_last_element.next_element.previous_element = ( 

2282 new_childs_last_element 

2283 ) 

2284 self.contents.insert(position, new_child) 

2285 

2286 return [new_child] 

2287 

2288 def unwrap(self) -> Self: 

2289 """Replace this `PageElement` with its contents. 

2290 

2291 :return: This object, no longer part of the tree. 

2292 """ 

2293 my_parent = self.parent 

2294 if my_parent is None: 

2295 raise ValueError( 

2296 "Cannot replace an element with its contents when that " 

2297 "element is not part of a tree." 

2298 ) 

2299 my_index = my_parent.index(self) 

2300 self.extract(_self_index=my_index) 

2301 for child in reversed(self.contents[:]): 

2302 my_parent.insert(my_index, child) 

2303 return self 

2304 

2305 replace_with_children = unwrap 

2306 

2307 @_deprecated("unwrap", "4.0.0") 

2308 def replaceWithChildren(self) -> _OneElement: 

2309 ": :meta private:" 

2310 return self.unwrap() 

2311 

2312 def append(self, tag: _InsertableElement) -> PageElement|List[PageElement]: 

2313 """Appends the given `PageElement` to the contents of this `Tag`. 

2314 

2315 :param tag: A PageElement. If this is another BeautifulSoup 

2316 object, all of its contents will be inserted into this 

2317 `Tag`, since one BeautifulSoup object can't contain another 

2318 one. 

2319 

2320 :return: The object that was just appended, or (if `tag` was a BeautifulSoup 

2321 object) all such objects. 

2322 """ 

2323 inserted = self.insert(len(self.contents), tag) 

2324 if isinstance(tag, Tag) and tag.name == "[document]": # TODO: can't reference BeautifulSoup class in this module 

2325 return inserted 

2326 else: 

2327 return inserted[0] 

2328 

2329 def extend(self, tags: Union[Iterable[_InsertableElement], Tag]) -> List[PageElement]: 

2330 """Appends one or more objects to the contents of this 

2331 `Tag`. 

2332 

2333 :param tags: If a list of `PageElement` objects is provided, 

2334 they will be appended to this tag's contents, one at a time. 

2335 If a single `Tag` is provided, its `Tag.contents` will be 

2336 used to extend this object's `Tag.contents`. 

2337 

2338 :return The list of PageElements that were appended. 

2339 """ 

2340 tag_list: Iterable[_InsertableElement] 

2341 

2342 if isinstance(tags, Tag): 

2343 tag_list = list(tags.contents) 

2344 elif isinstance(tags, (PageElement, str)): 

2345 # The caller should really be using append() instead, 

2346 # but we can make it work. 

2347 warnings.warn( 

2348 "A single non-Tag item was passed into Tag.extend. Use Tag.append instead.", 

2349 UserWarning, 

2350 stacklevel=2, 

2351 ) 

2352 if isinstance(tags, str) and not isinstance(tags, PageElement): 

2353 tags = NavigableString(tags) 

2354 tag_list = [tags] 

2355 elif isinstance(tags, Iterable): 

2356 # Moving items around the tree may change their position in 

2357 # the original list. Make a list that won't change. 

2358 tag_list = list(tags) 

2359 

2360 results: List[PageElement] = [] 

2361 for tag in tag_list: 

2362 appended = self.append(tag) 

2363 if isinstance(appended, list): 

2364 # This can happen if you pass in a mixture of Tag and BeautifulSoup objects. 

2365 results.extend(appended) 

2366 else: 

2367 results.append(appended) 

2368 

2369 return results 

2370 

2371 def clear(self, decompose: bool = False) -> None: 

2372 """Destroy all children of this `Tag` by calling 

2373 `PageElement.extract` on them. 

2374 

2375 :param decompose: If this is True, `PageElement.decompose` (a 

2376 more destructive method) will be called instead of 

2377 `PageElement.extract`. 

2378 """ 

2379 for element in self.contents[:]: 

2380 if decompose: 

2381 element.decompose() 

2382 else: 

2383 element.extract() 

2384 

2385 def smooth(self) -> None: 

2386 """Smooth out the children of this `Tag` by consolidating consecutive 

2387 strings. 

2388 

2389 If you perform a lot of operations that modify the tree, 

2390 calling this method afterwards can make pretty-printed output 

2391 look more natural. 

2392 """ 

2393 # Mark the first position of every pair of children that need 

2394 # to be consolidated. Do this rather than making a copy of 

2395 # self.contents, since in most cases very few strings will be 

2396 # affected. 

2397 marked = [] 

2398 for i, a in enumerate(self.contents): 

2399 if isinstance(a, Tag): 

2400 # Recursively smooth children. 

2401 a.smooth() 

2402 if i == len(self.contents) - 1: 

2403 # This is the last item in .contents, and it's not a 

2404 # tag. There's no chance it needs any work. 

2405 continue 

2406 b = self.contents[i + 1] 

2407 if ( 

2408 isinstance(a, NavigableString) 

2409 and isinstance(b, NavigableString) 

2410 and not isinstance(a, PreformattedString) 

2411 and not isinstance(b, PreformattedString) 

2412 ): 

2413 marked.append(i) 

2414 

2415 # Go over the marked positions in reverse order, so that 

2416 # removing items from .contents won't affect the remaining 

2417 # positions. 

2418 for i in reversed(marked): 

2419 a = cast(NavigableString, self.contents[i]) 

2420 b = cast(NavigableString, self.contents[i + 1]) 

2421 b.extract() 

2422 n = NavigableString(a + b) 

2423 a.replace_with(n) 

2424 

2425 def index(self, element: PageElement) -> int: 

2426 """Find the index of a child of this `Tag` (by identity, not value). 

2427 

2428 Doing this by identity avoids issues when a `Tag` contains two 

2429 children that have string equality. 

2430 

2431 :param element: Look for this `PageElement` in this object's contents. 

2432 """ 

2433 for i, child in enumerate(self.contents): 

2434 if child is element: 

2435 return i 

2436 raise ValueError("Tag.index: element not in tag") 

2437 

2438 def get( 

2439 self, key: str, default: Optional[_AttributeValue] = None 

2440 ) -> Optional[_AttributeValue]: 

2441 """Returns the value of the 'key' attribute for the tag, or 

2442 the value given for 'default' if it doesn't have that 

2443 attribute. 

2444 

2445 :param key: The attribute to look for. 

2446 :param default: Use this value if the attribute is not present 

2447 on this `Tag`. 

2448 """ 

2449 return self.attrs.get(key, default) 

2450 

2451 def get_attribute_list( 

2452 self, key: str, default: Optional[AttributeValueList] = None 

2453 ) -> AttributeValueList: 

2454 """The same as get(), but always returns a (possibly empty) list. 

2455 

2456 :param key: The attribute to look for. 

2457 :param default: Use this value if the attribute is not present 

2458 on this `Tag`. 

2459 :return: A list of strings, usually empty or containing only a single 

2460 value. 

2461 """ 

2462 list_value: AttributeValueList 

2463 value = self.get(key, default) 

2464 if value is None: 

2465 list_value = self.attribute_value_list_class() 

2466 elif isinstance(value, list): 

2467 list_value = value 

2468 else: 

2469 if not isinstance(value, str): 

2470 value = cast(str, value) 

2471 list_value = self.attribute_value_list_class([value]) 

2472 return list_value 

2473 

2474 def has_attr(self, key: str) -> bool: 

2475 """Does this `Tag` have an attribute with the given name?""" 

2476 return key in self.attrs 

2477 

2478 def __hash__(self) -> int: 

2479 return str(self).__hash__() 

2480 

2481 def __getitem__(self, key: str) -> _AttributeValue: 

2482 """tag[key] returns the value of the 'key' attribute for the Tag, 

2483 and throws an exception if it's not there.""" 

2484 return self.attrs[key] 

2485 

2486 def __iter__(self) -> Iterator[PageElement]: 

2487 "Iterating over a Tag iterates over its contents." 

2488 return iter(self.contents) 

2489 

2490 def __len__(self) -> int: 

2491 "The length of a Tag is the length of its list of contents." 

2492 return len(self.contents) 

2493 

2494 def __contains__(self, x: Any) -> bool: 

2495 return x in self.contents 

2496 

2497 def __bool__(self) -> bool: 

2498 "A tag is non-None even if it has no contents." 

2499 return True 

2500 

2501 def __setitem__(self, key: str, value: _AttributeValue) -> None: 

2502 """Setting tag[key] sets the value of the 'key' attribute for the 

2503 tag.""" 

2504 self.attrs[key] = value 

2505 

2506 def __delitem__(self, key: str) -> None: 

2507 "Deleting tag[key] deletes all 'key' attributes for the tag." 

2508 self.attrs.pop(key, None) 

2509 

2510 # Since Tag.__call__ is effectively the same as PageElement.find_all, see find_all for notes 

2511 # on these overloads. 

2512 

2513 @overload 

2514 def __call__( 

2515 self, 

2516 name: None = None, 

2517 attrs: None = None, 

2518 recursive: bool = True, 

2519 *, 

2520 string: _StrainableString, 

2521 limit: Optional[int] = None, 

2522 **kwargs: _StrainableAttribute, 

2523 ) -> _SomeNavigableStrings: 

2524 ... 

2525 

2526 @overload 

2527 def __call__( 

2528 self, 

2529 name: None = None, 

2530 attrs: None = None, 

2531 recursive: bool = True, 

2532 string: None = None, 

2533 limit: Optional[int] = None, 

2534 **kwargs: _StrainableAttribute, 

2535 ) -> _SomeTags: 

2536 ... 

2537 

2538 @overload 

2539 def __call__( 

2540 self, 

2541 name: None, 

2542 attrs: _StrainableAttributes, 

2543 recursive: bool = True, 

2544 string: None = None, 

2545 limit: Optional[int] = None, 

2546 **kwargs: _StrainableAttribute, 

2547 ) -> _SomeTags: 

2548 ... 

2549 

2550 @overload 

2551 def __call__( 

2552 self, 

2553 name: _FindMethodName, 

2554 attrs: Optional[_StrainableAttributes] = None, 

2555 recursive: bool = True, 

2556 string: Optional[_StrainableString] = None, 

2557 limit: Optional[int] = None, 

2558 **kwargs: _StrainableAttribute, 

2559 ) -> _SomeTags: 

2560 ... 

2561 

2562 def __call__( 

2563 self, 

2564 name: _OptionalFindMethodName = None, 

2565 attrs: Optional[_StrainableAttributes] = None, 

2566 recursive: bool = True, 

2567 string: Optional[_StrainableString] = None, 

2568 limit: Optional[int] = None, 

2569 **kwargs: _StrainableAttribute, 

2570 ) -> Union[_SomeTags,_SomeNavigableStrings,_QueryResults]: 

2571 """Calling a Tag like a function is the same as calling its 

2572 find_all() method. 

2573 

2574 Eg. tag('a') returns a list of all the A tags found within this tag. 

2575 """ 

2576 return self._find_all(name, attrs, string, limit, self._generator_for_recursive(recursive), **kwargs) 

2577 

2578 def __getattr__(self, subtag: str) -> Optional[Tag]: 

2579 """Calling tag.subtag is the same as calling tag.find(name="subtag")""" 

2580 # print("Getattr %s.%s" % (self.__class__, tag)) 

2581 result: _AtMostOneElement 

2582 if len(subtag) > 3 and subtag.endswith("Tag"): 

2583 # BS3: soup.aTag -> "soup.find("a") 

2584 tag_name = subtag[:-3] 

2585 warnings.warn( 

2586 '.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' 

2587 % dict(name=tag_name), 

2588 DeprecationWarning, 

2589 stacklevel=2, 

2590 ) 

2591 result = self.find(tag_name) 

2592 # We special case contents to avoid recursion. 

2593 elif not subtag.startswith("__") and not subtag == "contents": 

2594 result = self.find(subtag) 

2595 else: 

2596 raise AttributeError( 

2597 "'%s' object has no attribute '%s'" % (self.__class__, subtag) 

2598 ) 

2599 return result 

2600 

2601 def __eq__(self, other: Any) -> bool: 

2602 """Returns true iff this Tag has the same name, the same attributes, 

2603 and the same contents (recursively) as `other`.""" 

2604 if self is other: 

2605 return True 

2606 if not isinstance(other, Tag): 

2607 return False 

2608 if ( 

2609 not hasattr(other, "name") 

2610 or not hasattr(other, "attrs") 

2611 or not hasattr(other, "contents") 

2612 or self.name != other.name 

2613 or self.attrs != other.attrs 

2614 or len(self) != len(other) 

2615 ): 

2616 return False 

2617 for i, my_child in enumerate(self.contents): 

2618 if my_child != other.contents[i]: 

2619 return False 

2620 return True 

2621 

2622 def __ne__(self, other: Any) -> bool: 

2623 """Returns true iff this Tag is not identical to `other`, 

2624 as defined in __eq__.""" 

2625 return not self == other 

2626 

2627 def __repr__(self) -> str: 

2628 """Renders this `Tag` as a string.""" 

2629 return self.decode() 

2630 

2631 __str__ = __unicode__ = __repr__ 

2632 

2633 def encode( 

2634 self, 

2635 encoding: _Encoding = DEFAULT_OUTPUT_ENCODING, 

2636 indent_level: Optional[int] = None, 

2637 formatter: _FormatterOrName = "minimal", 

2638 errors: str = "xmlcharrefreplace", 

2639 ) -> bytes: 

2640 """Render this `Tag` and its contents as a bytestring. 

2641 

2642 :param encoding: The encoding to use when converting to 

2643 a bytestring. This may also affect the text of the document, 

2644 specifically any encoding declarations within the document. 

2645 :param indent_level: Each line of the rendering will be 

2646 indented this many levels. (The ``formatter`` decides what a 

2647 'level' means, in terms of spaces or other characters 

2648 output.) This is used internally in recursive calls while 

2649 pretty-printing. 

2650 :param formatter: Either a `Formatter` object, or a string naming one of 

2651 the standard formatters. 

2652 :param errors: An error handling strategy such as 

2653 'xmlcharrefreplace'. This value is passed along into 

2654 :py:meth:`str.encode` and its value should be one of the `error 

2655 handling constants defined by Python's codecs module 

2656 <https://docs.python.org/3/library/codecs.html#error-handlers>`_. 

2657 """ 

2658 # Turn the data structure into Unicode, then encode the 

2659 # Unicode. 

2660 u = self.decode(indent_level, encoding, formatter) 

2661 return u.encode(encoding, errors) 

2662 

2663 def decode( 

2664 self, 

2665 indent_level: Optional[int] = None, 

2666 eventual_encoding: _Encoding = DEFAULT_OUTPUT_ENCODING, 

2667 formatter: _FormatterOrName = "minimal", 

2668 iterator: Optional[Iterator[PageElement]] = None, 

2669 ) -> str: 

2670 """Render this `Tag` and its contents as a Unicode string. 

2671 

2672 :param indent_level: Each line of the rendering will be 

2673 indented this many levels. (The ``formatter`` decides what a 

2674 'level' means, in terms of spaces or other characters 

2675 output.) This is used internally in recursive calls while 

2676 pretty-printing. 

2677 :param encoding: The encoding you intend to use when 

2678 converting the string to a bytestring. decode() is *not* 

2679 responsible for performing that encoding. This information 

2680 is needed so that a real encoding can be substituted in if 

2681 the document contains an encoding declaration (e.g. in a 

2682 <meta> tag). 

2683 :param formatter: Either a `Formatter` object, or a string 

2684 naming one of the standard formatters. 

2685 :param iterator: The iterator to use when navigating over the 

2686 parse tree. This is only used by `Tag.decode_contents` and 

2687 you probably won't need to use it. 

2688 """ 

2689 pieces = [] 

2690 # First off, turn a non-Formatter `formatter` into a Formatter 

2691 # object. This will stop the lookup from happening over and 

2692 # over again. 

2693 if not isinstance(formatter, Formatter): 

2694 formatter = self.formatter_for_name(formatter) 

2695 

2696 if indent_level is True: 

2697 indent_level = 0 

2698 

2699 # The currently active tag that put us into string literal 

2700 # mode. Until this element is closed, children will be treated 

2701 # as string literals and not pretty-printed. String literal 

2702 # mode is turned on immediately after this tag begins, and 

2703 # turned off immediately before it's closed. This means there 

2704 # will be whitespace before and after the tag itself. 

2705 string_literal_tag = None 

2706 

2707 for event, element in self._event_stream(iterator): 

2708 if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT): 

2709 element = cast(Tag, element) 

2710 piece = element._format_tag(eventual_encoding, formatter, opening=True) 

2711 elif event is Tag.END_ELEMENT_EVENT: 

2712 element = cast(Tag, element) 

2713 piece = element._format_tag(eventual_encoding, formatter, opening=False) 

2714 if indent_level is not None: 

2715 indent_level -= 1 

2716 else: 

2717 element = cast(NavigableString, element) 

2718 piece = element.output_ready(formatter) 

2719 

2720 # Now we need to apply the 'prettiness' -- extra 

2721 # whitespace before and/or after this tag. This can get 

2722 # complicated because certain tags, like <pre> and 

2723 # <script>, can't be prettified, since adding whitespace would 

2724 # change the meaning of the content. 

2725 

2726 # The default behavior is to add whitespace before and 

2727 # after an element when string literal mode is off, and to 

2728 # leave things as they are when string literal mode is on. 

2729 if string_literal_tag: 

2730 indent_before = indent_after = False 

2731 else: 

2732 indent_before = indent_after = True 

2733 

2734 # The only time the behavior is more complex than that is 

2735 # when we encounter an opening or closing tag that might 

2736 # put us into or out of string literal mode. 

2737 if ( 

2738 event is Tag.START_ELEMENT_EVENT 

2739 and not string_literal_tag 

2740 and not cast(Tag, element)._should_pretty_print() 

2741 ): 

2742 # We are about to enter string literal mode. Add 

2743 # whitespace before this tag, but not after. We 

2744 # will stay in string literal mode until this tag 

2745 # is closed. 

2746 indent_before = True 

2747 indent_after = False 

2748 string_literal_tag = element 

2749 elif event is Tag.END_ELEMENT_EVENT and element is string_literal_tag: 

2750 # We are about to exit string literal mode by closing 

2751 # the tag that sent us into that mode. Add whitespace 

2752 # after this tag, but not before. 

2753 indent_before = False 

2754 indent_after = True 

2755 string_literal_tag = None 

2756 

2757 # Now we know whether to add whitespace before and/or 

2758 # after this element. 

2759 if indent_level is not None: 

2760 if indent_before or indent_after: 

2761 if isinstance(element, NavigableString): 

2762 piece = piece.strip() 

2763 if piece: 

2764 piece = self._indent_string( 

2765 piece, indent_level, formatter, indent_before, indent_after 

2766 ) 

2767 if event == Tag.START_ELEMENT_EVENT: 

2768 indent_level += 1 

2769 pieces.append(piece) 

2770 return "".join(pieces) 

2771 

2772 class _TreeTraversalEvent(object): 

2773 """An internal class representing an event in the process 

2774 of traversing a parse tree. 

2775 

2776 :meta private: 

2777 """ 

2778 

2779 # Stand-ins for the different events yielded by _event_stream 

2780 START_ELEMENT_EVENT = _TreeTraversalEvent() #: :meta private: 

2781 END_ELEMENT_EVENT = _TreeTraversalEvent() #: :meta private: 

2782 EMPTY_ELEMENT_EVENT = _TreeTraversalEvent() #: :meta private: 

2783 STRING_ELEMENT_EVENT = _TreeTraversalEvent() #: :meta private: 

2784 

2785 def _event_stream( 

2786 self, iterator: Optional[Iterator[PageElement]] = None 

2787 ) -> Iterator[Tuple[_TreeTraversalEvent, PageElement]]: 

2788 """Yield a sequence of events that can be used to reconstruct the DOM 

2789 for this element. 

2790 

2791 This lets us recreate the nested structure of this element 

2792 (e.g. when formatting it as a string) without using recursive 

2793 method calls. 

2794 

2795 This is similar in concept to the SAX API, but it's a simpler 

2796 interface designed for internal use. The events are different 

2797 from SAX and the arguments associated with the events are Tags 

2798 and other Beautiful Soup objects. 

2799 

2800 :param iterator: An alternate iterator to use when traversing 

2801 the tree. 

2802 """ 

2803 tag_stack: List[Tag] = [] 

2804 

2805 iterator = iterator or self.self_and_descendants 

2806 

2807 for c in iterator: 

2808 # If the parent of the element we're about to yield is not 

2809 # the tag currently on the stack, it means that the tag on 

2810 # the stack closed before this element appeared. 

2811 while tag_stack and c.parent != tag_stack[-1]: 

2812 now_closed_tag = tag_stack.pop() 

2813 yield Tag.END_ELEMENT_EVENT, now_closed_tag 

2814 

2815 if isinstance(c, Tag): 

2816 if c.is_empty_element: 

2817 yield Tag.EMPTY_ELEMENT_EVENT, c 

2818 else: 

2819 yield Tag.START_ELEMENT_EVENT, c 

2820 tag_stack.append(c) 

2821 continue 

2822 else: 

2823 yield Tag.STRING_ELEMENT_EVENT, c 

2824 

2825 while tag_stack: 

2826 now_closed_tag = tag_stack.pop() 

2827 yield Tag.END_ELEMENT_EVENT, now_closed_tag 

2828 

2829 def _indent_string( 

2830 self, 

2831 s: str, 

2832 indent_level: int, 

2833 formatter: Formatter, 

2834 indent_before: bool, 

2835 indent_after: bool, 

2836 ) -> str: 

2837 """Add indentation whitespace before and/or after a string. 

2838 

2839 :param s: The string to amend with whitespace. 

2840 :param indent_level: The indentation level; affects how much 

2841 whitespace goes before the string. 

2842 :param indent_before: Whether or not to add whitespace 

2843 before the string. 

2844 :param indent_after: Whether or not to add whitespace 

2845 (a newline) after the string. 

2846 """ 

2847 space_before = "" 

2848 if indent_before and indent_level: 

2849 space_before = formatter.indent * indent_level 

2850 

2851 space_after = "" 

2852 if indent_after: 

2853 space_after = "\n" 

2854 

2855 return space_before + s + space_after 

2856 

2857 def _format_tag( 

2858 self, eventual_encoding: str, formatter: Formatter, opening: bool 

2859 ) -> str: 

2860 if self.hidden: 

2861 # A hidden tag is invisible, although its contents 

2862 # are visible. 

2863 return "" 

2864 

2865 # A tag starts with the < character (see below). 

2866 

2867 # Then the / character, if this is a closing tag. 

2868 closing_slash = "" 

2869 if not opening: 

2870 closing_slash = "/" 

2871 

2872 # Then an optional namespace prefix. 

2873 prefix = "" 

2874 if self.prefix: 

2875 prefix = self.prefix + ":" 

2876 

2877 # Then a list of attribute values, if this is an opening tag. 

2878 attribute_string = "" 

2879 if opening: 

2880 attributes = formatter.attributes(self) 

2881 attrs = [] 

2882 for key, val in attributes: 

2883 if val is None: 

2884 decoded = key 

2885 else: 

2886 if isinstance(val, list) or isinstance(val, tuple): 

2887 val = " ".join(val) 

2888 elif not isinstance(val, str): 

2889 val = str(val) 

2890 elif ( 

2891 isinstance(val, AttributeValueWithCharsetSubstitution) 

2892 and eventual_encoding is not None 

2893 ): 

2894 val = val.substitute_encoding(eventual_encoding) 

2895 

2896 text = formatter.attribute_value(val) 

2897 decoded = str(key) + "=" + formatter.quoted_attribute_value(text) 

2898 attrs.append(decoded) 

2899 if attrs: 

2900 attribute_string = " " + " ".join(attrs) 

2901 

2902 # Then an optional closing slash (for a void element in an 

2903 # XML document). 

2904 void_element_closing_slash = "" 

2905 if self.is_empty_element: 

2906 void_element_closing_slash = formatter.void_element_close_prefix or "" 

2907 

2908 # Put it all together. 

2909 return ( 

2910 "<" 

2911 + closing_slash 

2912 + prefix 

2913 + self.name 

2914 + attribute_string 

2915 + void_element_closing_slash 

2916 + ">" 

2917 ) 

2918 

2919 def _should_pretty_print(self, indent_level: int = 1) -> bool: 

2920 """Should this tag be pretty-printed? 

2921 

2922 Most of them should, but some (such as <pre> in HTML 

2923 documents) should not. 

2924 """ 

2925 return indent_level is not None and ( 

2926 not self.preserve_whitespace_tags 

2927 or self.name not in self.preserve_whitespace_tags 

2928 ) 

2929 

2930 @overload 

2931 def prettify( 

2932 self, 

2933 encoding: None = None, 

2934 formatter: _FormatterOrName = "minimal", 

2935 ) -> str: 

2936 ... 

2937 

2938 @overload 

2939 def prettify( 

2940 self, 

2941 encoding: _Encoding, 

2942 formatter: _FormatterOrName = "minimal", 

2943 ) -> bytes: 

2944 ... 

2945 

2946 def prettify( 

2947 self, 

2948 encoding: Optional[_Encoding] = None, 

2949 formatter: _FormatterOrName = "minimal", 

2950 ) -> Union[str, bytes]: 

2951 """Pretty-print this `Tag` as a string or bytestring. 

2952 

2953 :param encoding: The encoding of the bytestring, or None if you want Unicode. 

2954 :param formatter: A Formatter object, or a string naming one of 

2955 the standard formatters. 

2956 :return: A string (if no ``encoding`` is provided) or a bytestring 

2957 (otherwise). 

2958 """ 

2959 if encoding is None: 

2960 return self.decode(indent_level=0, formatter=formatter) 

2961 else: 

2962 return self.encode(encoding=encoding, indent_level=0, formatter=formatter) 

2963 

2964 def decode_contents( 

2965 self, 

2966 indent_level: Optional[int] = None, 

2967 eventual_encoding: _Encoding = DEFAULT_OUTPUT_ENCODING, 

2968 formatter: _FormatterOrName = "minimal", 

2969 ) -> str: 

2970 """Renders the contents of this tag as a Unicode string. 

2971 

2972 :param indent_level: Each line of the rendering will be 

2973 indented this many levels. (The formatter decides what a 

2974 'level' means in terms of spaces or other characters 

2975 output.) Used internally in recursive calls while 

2976 pretty-printing. 

2977 

2978 :param eventual_encoding: The tag is destined to be 

2979 encoded into this encoding. decode_contents() is *not* 

2980 responsible for performing that encoding. This information 

2981 is needed so that a real encoding can be substituted in if 

2982 the document contains an encoding declaration (e.g. in a 

2983 <meta> tag). 

2984 

2985 :param formatter: A `Formatter` object, or a string naming one of 

2986 the standard Formatters. 

2987 """ 

2988 return self.decode( 

2989 indent_level, eventual_encoding, formatter, iterator=self.descendants 

2990 ) 

2991 

2992 def encode_contents( 

2993 self, 

2994 indent_level: Optional[int] = None, 

2995 encoding: _Encoding = DEFAULT_OUTPUT_ENCODING, 

2996 formatter: _FormatterOrName = "minimal", 

2997 ) -> bytes: 

2998 """Renders the contents of this PageElement as a bytestring. 

2999 

3000 :param indent_level: Each line of the rendering will be 

3001 indented this many levels. (The ``formatter`` decides what a 

3002 'level' means, in terms of spaces or other characters 

3003 output.) This is used internally in recursive calls while 

3004 pretty-printing. 

3005 :param formatter: Either a `Formatter` object, or a string naming one of 

3006 the standard formatters. 

3007 :param encoding: The bytestring will be in this encoding. 

3008 """ 

3009 contents = self.decode_contents(indent_level, encoding, formatter) 

3010 return contents.encode(encoding) 

3011 

3012 @_deprecated("encode_contents", "4.0.0") 

3013 def renderContents( 

3014 self, 

3015 encoding: _Encoding = DEFAULT_OUTPUT_ENCODING, 

3016 prettyPrint: bool = False, 

3017 indentLevel: Optional[int] = 0, 

3018 ) -> bytes: 

3019 """Deprecated method for BS3 compatibility. 

3020 

3021 :meta private: 

3022 """ 

3023 if not prettyPrint: 

3024 indentLevel = None 

3025 return self.encode_contents(indent_level=indentLevel, encoding=encoding) 

3026 

3027 # Soup methods 

3028 # 

3029 

3030 # People who call these methods in a type-safe environment 

3031 # basically want to know whether the call is going to return 

3032 # NavigableStrings or Tags. It's always one or the other, never 

3033 # both, but spelling it out requires a number of overloads for 

3034 # each method. 

3035 # 

3036 # If I had it to do over again I'd design this API differently (it 

3037 # would look more like ElementFilter), but that's life. 

3038 # 

3039 # The overloads all look for a clue in the input which restricts 

3040 # the method to returning either only strings or only tags. Only 

3041 # the most common cases are covered. 

3042 

3043 # e.g. find(string="foo") 

3044 # -> string information but no tag information 

3045 # -> string 

3046 @overload 

3047 def find( 

3048 self, 

3049 name: None = None, 

3050 attrs: None = None, 

3051 recursive: bool = True, 

3052 *, 

3053 string: _StrainableString, 

3054 **kwargs: _StrainableAttribute, 

3055 ) -> _AtMostOneNavigableString: 

3056 ... 

3057 

3058 # e.g. find() -> default behavior -> tag 

3059 # find(attr="value") -> only tags have attrs -> tag 

3060 @overload 

3061 def find( 

3062 self, 

3063 name: None = None, 

3064 attrs: None = None, 

3065 recursive: bool = True, 

3066 string: None = None, 

3067 **kwargs: _StrainableAttribute, 

3068 ) -> _AtMostOneTag: 

3069 ... 

3070 

3071 # e.g. find(attrs=dict(attr="value")) 

3072 # -> only tags have attrs 

3073 # -> tag 

3074 @overload 

3075 def find( 

3076 self, 

3077 name: None, 

3078 attrs: _StrainableAttributes, 

3079 recursive: bool = True, 

3080 string: Optional[_StrainableString] = None, 

3081 **kwargs: _StrainableAttribute, 

3082 ) -> _AtMostOneTag: 

3083 ... 

3084 

3085 # e.g. find(name="a")) -> only tags have names -> tag 

3086 # 

3087 # The confusing and controversial case of find(name="a", string="foo") 

3088 # also hits this overload. 

3089 @overload 

3090 def find( 

3091 self, 

3092 name: _FindMethodName, 

3093 attrs: Optional[_StrainableAttributes] = None, 

3094 recursive: bool = True, 

3095 string: Optional[_StrainableString] = None, 

3096 **kwargs: _StrainableAttribute, 

3097 ) -> _AtMostOneTag: 

3098 ... 

3099 

3100 # Some lesser-used cases are not covered by the overrides. Those 

3101 # cases will hit this method directly and return a very general 

3102 # type which will need to be cast after the call. 

3103 def find( 

3104 self, 

3105 name: _OptionalFindMethodName = None, 

3106 attrs: Optional[_StrainableAttributes] = None, 

3107 recursive: bool = True, 

3108 string: Optional[_StrainableString] = None, 

3109 **kwargs: _StrainableAttribute, 

3110 ) -> Union[_AtMostOneTag,_AtMostOneNavigableString,_AtMostOneElement]: 

3111 """Look in the children of this PageElement and find the first 

3112 PageElement that matches the given criteria. 

3113 

3114 All find_* methods take a common set of arguments. See the online 

3115 documentation for detailed explanations. 

3116 

3117 :param name: A filter on tag name. 

3118 :param attrs: Additional filters on attribute values. 

3119 :param recursive: If this is True, find() will perform a 

3120 recursive search of this Tag's children. Otherwise, 

3121 only the direct children will be considered. 

3122 :param string: A filter on the `Tag.string` attribute. 

3123 :kwargs: Additional filters on attribute values. 

3124 """ 

3125 tags = self._find_all(name, attrs, string, 1, self._generator_for_recursive(recursive), **kwargs) 

3126 if tags: 

3127 return tags[0] 

3128 return None 

3129 

3130 findChild = _deprecated_function_alias("findChild", "find", "3.0.0") 

3131 

3132 # e.g. find_all(string="foo") 

3133 # -> string information but no tag information 

3134 # -> strings 

3135 # 

3136 # Also covers unlikely cases like find_all(name=None, string="foo") 

3137 # 

3138 # "To mark parameters as keyword-only, indicating the parameters 

3139 # must be passed by keyword argument, place an * in the arguments 

3140 # list just before the first keyword-only parameter." 

3141 # 

3142 # --https://peps.python.org/pep-0570/#keyword-only-arguments 

3143 @overload 

3144 def find_all( 

3145 self, 

3146 name: None = None, 

3147 attrs: None = None, 

3148 recursive: bool = True, 

3149 *, 

3150 string: _StrainableString, 

3151 limit: Optional[int] = None, 

3152 **kwargs: _StrainableAttribute, 

3153 ) -> _SomeNavigableStrings: 

3154 ... 

3155 

3156 # e.g. find_all() -> default behavior -> tags 

3157 # find_all(attr="value") -> only tags have attrs -> tags 

3158 @overload 

3159 def find_all( 

3160 self, 

3161 name: None = None, 

3162 attrs: None = None, 

3163 recursive: bool = True, 

3164 string: None = None, 

3165 limit: Optional[int] = None, 

3166 **kwargs: _StrainableAttribute, 

3167 ) -> _SomeTags: 

3168 ... 

3169 

3170 # e.g. find_all(attrs=dict(attr="value")) 

3171 # -> only tags have attrs 

3172 # -> tags 

3173 @overload 

3174 def find_all( 

3175 self, 

3176 name: None, 

3177 attrs: _StrainableAttributes, 

3178 recursive: bool = True, 

3179 string: Optional[_StrainableString] = None, 

3180 limit: Optional[int] = None, 

3181 **kwargs: _StrainableAttribute, 

3182 ) -> _SomeTags: 

3183 ... 

3184 

3185 # e.g. find_all(name="a")) -> only tags have names -> tags 

3186 # 

3187 # The confusing and controversial case of find_all(name="a", string="foo") 

3188 # also hits this overload. 

3189 @overload 

3190 def find_all( 

3191 self, 

3192 name: _FindMethodName, 

3193 attrs: Optional[_StrainableAttributes] = None, 

3194 recursive: bool = True, 

3195 string: Optional[_StrainableString] = None, 

3196 limit: Optional[int] = None, 

3197 **kwargs: _StrainableAttribute, 

3198 ) -> _SomeTags: 

3199 ... 

3200 

3201 # Without the clues above, we don't know whether the method will 

3202 # return strings or tags. However every common case will trigger one 

3203 # of the overloads and give us the clue we need. 

3204 def find_all( 

3205 self, 

3206 name: _OptionalFindMethodName = None, 

3207 attrs: Optional[_StrainableAttributes] = None, 

3208 recursive: bool = True, 

3209 string: Optional[_StrainableString] = None, 

3210 limit: Optional[int] = None, 

3211 **kwargs: _StrainableAttribute, 

3212 ) -> Union[_SomeTags,_SomeNavigableStrings]: 

3213 """Look in the children of this `PageElement` and find all 

3214 `PageElement` objects that match the given criteria. 

3215 

3216 All find_* methods take a common set of arguments. See the online 

3217 documentation for detailed explanations. 

3218 

3219 :param name: A filter on tag name. 

3220 :param attrs: Additional filters on attribute values. 

3221 :param recursive: If this is True, find_all() will perform a 

3222 recursive search of this PageElement's children. Otherwise, 

3223 only the direct children will be considered. 

3224 :param limit: Stop looking after finding this many results. 

3225 :kwargs: Additional filters on attribute values. 

3226 """ 

3227 generator = self._generator_for_recursive(recursive) 

3228 

3229 if string is not None and (name is not None or attrs is not None or kwargs): 

3230 # TODO: Using the @overload decorator to express the three ways you 

3231 # could get into this path is way too much code for a rarely(?) used 

3232 # feature. 

3233 return cast(ResultSet[Tag], 

3234 self._find_all(name, attrs, string, limit, generator, 

3235 **kwargs)) 

3236 

3237 if string is None: 

3238 # If string is None, we're searching for tags. 

3239 return cast(ResultSet[Tag], self._find_all( 

3240 name, attrs, None, limit, generator, **kwargs 

3241 )) 

3242 

3243 # Otherwise, we're searching for strings. 

3244 return cast(ResultSet[NavigableString], self._find_all( 

3245 None, None, string, limit, generator, **kwargs 

3246 )) 

3247 

3248 findAll = _deprecated_function_alias("findAll", "find_all", "4.0.0") 

3249 findChildren = _deprecated_function_alias("findChildren", "find_all", "3.0.0") 

3250 

3251 # Generator methods 

3252 @property 

3253 def children(self) -> Iterator[PageElement]: 

3254 """Iterate over all direct children of this `PageElement`.""" 

3255 return (x for x in self.contents) 

3256 

3257 @property 

3258 def self_and_descendants(self) -> Iterator[PageElement]: 

3259 """Iterate over this `Tag` and its children in a 

3260 breadth-first sequence. 

3261 """ 

3262 return self._self_and(self.descendants) 

3263 

3264 @property 

3265 def descendants(self) -> Iterator[PageElement]: 

3266 """Iterate over all children of this `Tag` in a 

3267 breadth-first sequence. 

3268 """ 

3269 if not len(self.contents): 

3270 return 

3271 # _last_descendant() can't return None here because 

3272 # accept_self is True. Worst case, last_descendant will end up 

3273 # as self. 

3274 last_descendant = cast(PageElement, self._last_descendant(accept_self=True)) 

3275 stopNode = last_descendant.next_element 

3276 current: _AtMostOneElement = self.contents[0] 

3277 while current is not stopNode and current is not None: 

3278 successor = current.next_element 

3279 yield current 

3280 current = successor 

3281 

3282 def _generator_for_recursive(self, recursive:bool) -> Iterator[PageElement]: 

3283 """Helper method to process the boolean `recursive` argument 

3284 for find* methods. 

3285 

3286 :return: the appropriate generator 

3287 """ 

3288 if recursive: 

3289 return self.descendants 

3290 return self.children 

3291 

3292 # CSS selector code 

3293 def select_one( 

3294 self, selector: str, namespaces: Optional[Dict[str, str]] = None, **kwargs: Any 

3295 ) -> Optional[Tag]: 

3296 """Perform a CSS selection operation on the current element. 

3297 

3298 :param selector: A CSS selector. 

3299 

3300 :param namespaces: A dictionary mapping namespace prefixes 

3301 used in the CSS selector to namespace URIs. By default, 

3302 Beautiful Soup will use the prefixes it encountered while 

3303 parsing the document. 

3304 

3305 :param kwargs: Keyword arguments to be passed into Soup Sieve's 

3306 soupsieve.select() method. 

3307 """ 

3308 return self.css.select_one(selector, namespaces, **kwargs) 

3309 

3310 def select( 

3311 self, 

3312 selector: str, 

3313 namespaces: Optional[Dict[str, str]] = None, 

3314 limit: int = 0, 

3315 **kwargs: Any, 

3316 ) -> ResultSet[Tag]: 

3317 """Perform a CSS selection operation on the current element. 

3318 

3319 This uses the SoupSieve library. 

3320 

3321 :param selector: A string containing a CSS selector. 

3322 

3323 :param namespaces: A dictionary mapping namespace prefixes 

3324 used in the CSS selector to namespace URIs. By default, 

3325 Beautiful Soup will use the prefixes it encountered while 

3326 parsing the document. 

3327 

3328 :param limit: After finding this number of results, stop looking. 

3329 

3330 :param kwargs: Keyword arguments to be passed into SoupSieve's 

3331 soupsieve.select() method. 

3332 """ 

3333 return self.css.select(selector, namespaces, limit, **kwargs) 

3334 

3335 @property 

3336 def css(self) -> CSS: 

3337 """Return an interface to the CSS selector API.""" 

3338 return CSS(self) 

3339 

3340 # Old names for backwards compatibility 

3341 @_deprecated("children", "4.0.0") 

3342 def childGenerator(self) -> Iterator[PageElement]: 

3343 """Deprecated generator. 

3344 

3345 :meta private: 

3346 """ 

3347 return self.children 

3348 

3349 @_deprecated("descendants", "4.0.0") 

3350 def recursiveChildGenerator(self) -> Iterator[PageElement]: 

3351 """Deprecated generator. 

3352 

3353 :meta private: 

3354 """ 

3355 return self.descendants 

3356 

3357 @_deprecated("has_attr", "4.0.0") 

3358 def has_key(self, key: str) -> bool: 

3359 """Deprecated method. This was kind of misleading because has_key() 

3360 (attributes) was different from __in__ (contents). 

3361 

3362 has_key() is gone in Python 3, anyway. 

3363 

3364 :meta private: 

3365 """ 

3366 return self.has_attr(key) 

3367 

3368 

3369_PageElementT = TypeVar("_PageElementT", bound=PageElement) 

3370 

3371class ResultSet(List[_PageElementT], Generic[_PageElementT]): 

3372 """A ResultSet is a list of `PageElement` objects, gathered as the result 

3373 of matching an :py:class:`ElementFilter` against a parse tree. Basically, a list of 

3374 search results. 

3375 """ 

3376 

3377 source: Optional[ElementFilter] 

3378 

3379 def __init__( 

3380 self, source: Optional[ElementFilter], result: Iterable[_PageElementT] = () 

3381 ) -> None: 

3382 super(ResultSet, self).__init__(result) 

3383 self.source = source 

3384 

3385 def __getattr__(self, key: str) -> None: 

3386 """Raise a helpful exception to explain a common code fix.""" 

3387 raise AttributeError( 

3388 f"""ResultSet object has no attribute "{key}". You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?""" 

3389 ) 

3390 

3391# Now that all the classes used by SoupStrainer have been defined, 

3392# import SoupStrainer itself into this module to preserve the 

3393# backwards compatibility of anyone who imports 

3394# bs4.element.SoupStrainer. 

3395from bs4.filter import SoupStrainer # noqa: E402