Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bs4/__init_

1"""Beautiful Soup Elixir and Tonic - "The Screen-Scraper's Friend".

3http://www.crummy.com/software/BeautifulSoup/

5Beautiful Soup uses a pluggable XML or HTML parser to parse a

6(possibly invalid) document into a tree representation. Beautiful Soup

7provides methods and Pythonic idioms that make it easy to navigate,

8search, and modify the parse tree.

10Beautiful Soup works with Python 3.6 and up. It works better if lxml

11and/or html5lib is installed.

13For more than you ever wanted to know about Beautiful Soup, see the

14documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/

15"""

17__author__ = "Leonard Richardson (leonardr@segfault.org)"

18__version__ = "4.12.2"

20# Use of this source code is governed by the MIT license.

21__license__ = "MIT"

23__all__ = ['BeautifulSoup']

25from collections import Counter

26import os

27import re

28import sys

29import traceback

30import warnings

32# The very first thing we do is give a useful error if someone is

33# running this code under Python 2.

34if sys.version_info.major < 3:

35 raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.')

37from .builder import (

38 builder_registry,

39 ParserRejectedMarkup,

40 XMLParsedAsHTMLWarning,

41 HTMLParserTreeBuilder

42)

43from .dammit import UnicodeDammit

44from .element import (

45 CData,

46 Comment,

47 CSS,

48 DEFAULT_OUTPUT_ENCODING,

49 Declaration,

50 Doctype,

51 NavigableString,

52 PageElement,

53 ProcessingInstruction,

54 PYTHON_SPECIFIC_ENCODINGS,

55 ResultSet,

56 Script,

57 Stylesheet,

58 SoupStrainer,

59 Tag,

60 TemplateString,

61 )

63# Define some custom warnings.

64class GuessedAtParserWarning(UserWarning):

65 """The warning issued when BeautifulSoup has to guess what parser to

66 use -- probably because no parser was specified in the constructor.

67 """

69class MarkupResemblesLocatorWarning(UserWarning):

70 """The warning issued when BeautifulSoup is given 'markup' that

71 actually looks like a resource locator -- a URL or a path to a file

72 on disk.

73 """

76class BeautifulSoup(Tag):

77 """A data structure representing a parsed HTML or XML document.

79 Most of the methods you'll call on a BeautifulSoup object are inherited from

80 PageElement or Tag.

82 Internally, this class defines the basic interface called by the

83 tree builders when converting an HTML/XML document into a data

84 structure. The interface abstracts away the differences between

85 parsers. To write a new tree builder, you'll need to understand

86 these methods as a whole.

88 These methods will be called by the BeautifulSoup constructor:

89 * reset()

90 * feed(markup)

92 The tree builder may call these methods from its feed() implementation:

93 * handle_starttag(name, attrs) # See note about return value

94 * handle_endtag(name)

95 * handle_data(data) # Appends to the current data node

96 * endData(containerClass) # Ends the current data node

98 No matter how complicated the underlying parser is, you should be

99 able to build a tree using 'start tag' events, 'end tag' events,

100 'data' events, and "done with data" events.

101

102 If you encounter an empty-element tag (aka a self-closing tag,

103 like HTML's <br> tag), call handle_starttag and then

104 handle_endtag.

105 """

106

107 # Since BeautifulSoup subclasses Tag, it's possible to treat it as

108 # a Tag with a .name. This name makes it clear the BeautifulSoup

109 # object isn't a real markup tag.

110 ROOT_TAG_NAME = '[document]'

111

112 # If the end-user gives no indication which tree builder they

113 # want, look for one with these features.

114 DEFAULT_BUILDER_FEATURES = ['html', 'fast']

115

116 # A string containing all ASCII whitespace characters, used in

117 # endData() to detect data chunks that seem 'empty'.

118 ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'

119

120 NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"

121

122 def __init__(self, markup="", features=None, builder=None,

123 parse_only=None, from_encoding=None, exclude_encodings=None,

124 element_classes=None, **kwargs):

125 """Constructor.

126

127 :param markup: A string or a file-like object representing

128 markup to be parsed.

129

130 :param features: Desirable features of the parser to be

131 used. This may be the name of a specific parser ("lxml",

132 "lxml-xml", "html.parser", or "html5lib") or it may be the

133 type of markup to be used ("html", "html5", "xml"). It's

134 recommended that you name a specific parser, so that

135 Beautiful Soup gives you the same results across platforms

136 and virtual environments.

137

138 :param builder: A TreeBuilder subclass to instantiate (or

139 instance to use) instead of looking one up based on

140 `features`. You only need to use this if you've implemented a

141 custom TreeBuilder.

142

143 :param parse_only: A SoupStrainer. Only parts of the document

144 matching the SoupStrainer will be considered. This is useful

145 when parsing part of a document that would otherwise be too

146 large to fit into memory.

147

148 :param from_encoding: A string indicating the encoding of the

149 document to be parsed. Pass this in if Beautiful Soup is

150 guessing wrongly about the document's encoding.

151

152 :param exclude_encodings: A list of strings indicating

153 encodings known to be wrong. Pass this in if you don't know

154 the document's encoding but you know Beautiful Soup's guess is

155 wrong.

156

157 :param element_classes: A dictionary mapping BeautifulSoup

158 classes like Tag and NavigableString, to other classes you'd

159 like to be instantiated instead as the parse tree is

160 built. This is useful for subclassing Tag or NavigableString

161 to modify default behavior.

162

163 :param kwargs: For backwards compatibility purposes, the

164 constructor accepts certain keyword arguments used in

165 Beautiful Soup 3. None of these arguments do anything in

166 Beautiful Soup 4; they will result in a warning and then be

167 ignored.

168

169 Apart from this, any keyword arguments passed into the

170 BeautifulSoup constructor are propagated to the TreeBuilder

171 constructor. This makes it possible to configure a

172 TreeBuilder by passing in arguments, not just by saying which

173 one to use.

174 """

175 if 'convertEntities' in kwargs:

176 del kwargs['convertEntities']

177 warnings.warn(

178 "BS4 does not respect the convertEntities argument to the "

179 "BeautifulSoup constructor. Entities are always converted "

180 "to Unicode characters.")

181

182 if 'markupMassage' in kwargs:

183 del kwargs['markupMassage']

184 warnings.warn(

185 "BS4 does not respect the markupMassage argument to the "

186 "BeautifulSoup constructor. The tree builder is responsible "

187 "for any necessary markup massage.")

188

189 if 'smartQuotesTo' in kwargs:

190 del kwargs['smartQuotesTo']

191 warnings.warn(

192 "BS4 does not respect the smartQuotesTo argument to the "

193 "BeautifulSoup constructor. Smart quotes are always converted "

194 "to Unicode characters.")

195

196 if 'selfClosingTags' in kwargs:

197 del kwargs['selfClosingTags']

198 warnings.warn(

199 "BS4 does not respect the selfClosingTags argument to the "

200 "BeautifulSoup constructor. The tree builder is responsible "

201 "for understanding self-closing tags.")

202

203 if 'isHTML' in kwargs:

204 del kwargs['isHTML']

205 warnings.warn(

206 "BS4 does not respect the isHTML argument to the "

207 "BeautifulSoup constructor. Suggest you use "

208 "features='lxml' for HTML and features='lxml-xml' for "

209 "XML.")

210

211 def deprecated_argument(old_name, new_name):

212 if old_name in kwargs:

213 warnings.warn(

214 'The "%s" argument to the BeautifulSoup constructor '

215 'has been renamed to "%s."' % (old_name, new_name),

216 DeprecationWarning, stacklevel=3

217 )

218 return kwargs.pop(old_name)

219 return None

220

221 parse_only = parse_only or deprecated_argument(

222 "parseOnlyThese", "parse_only")

223

224 from_encoding = from_encoding or deprecated_argument(

225 "fromEncoding", "from_encoding")

226

227 if from_encoding and isinstance(markup, str):

228 warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")

229 from_encoding = None

230

231 self.element_classes = element_classes or dict()

232

233 # We need this information to track whether or not the builder

234 # was specified well enough that we can omit the 'you need to

235 # specify a parser' warning.

236 original_builder = builder

237 original_features = features

238

239 if isinstance(builder, type):

240 # A builder class was passed in; it needs to be instantiated.

241 builder_class = builder

242 builder = None

243 elif builder is None:

244 if isinstance(features, str):

245 features = [features]

246 if features is None or len(features) == 0:

247 features = self.DEFAULT_BUILDER_FEATURES

248 builder_class = builder_registry.lookup(*features)

249 if builder_class is None:

250 raise FeatureNotFound(

251 "Couldn't find a tree builder with the features you "

252 "requested: %s. Do you need to install a parser library?"

253 % ",".join(features))

254

255 # At this point either we have a TreeBuilder instance in

256 # builder, or we have a builder_class that we can instantiate

257 # with the remaining **kwargs.

258 if builder is None:

259 builder = builder_class(**kwargs)

260 if not original_builder and not (

261 original_features == builder.NAME or

262 original_features in builder.ALTERNATE_NAMES

263 ) and markup:

264 # The user did not tell us which TreeBuilder to use,

265 # and we had to guess. Issue a warning.

266 if builder.is_xml:

267 markup_type = "XML"

268 else:

269 markup_type = "HTML"

270

271 # This code adapted from warnings.py so that we get the same line

272 # of code as our warnings.warn() call gets, even if the answer is wrong

273 # (as it may be in a multithreading situation).

274 caller = None

275 try:

276 caller = sys._getframe(1)

277 except ValueError:

278 pass

279 if caller:

280 globals = caller.f_globals

281 line_number = caller.f_lineno

282 else:

283 globals = sys.__dict__

284 line_number= 1

285 filename = globals.get('__file__')

286 if filename:

287 fnl = filename.lower()

288 if fnl.endswith((".pyc", ".pyo")):

289 filename = filename[:-1]

290 if filename:

291 # If there is no filename at all, the user is most likely in a REPL,

292 # and the warning is not necessary.

293 values = dict(

294 filename=filename,

295 line_number=line_number,

296 parser=builder.NAME,

297 markup_type=markup_type

298 )

299 warnings.warn(

300 self.NO_PARSER_SPECIFIED_WARNING % values,

301 GuessedAtParserWarning, stacklevel=2

302 )

303 else:

304 if kwargs:

305 warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.")

306

307 self.builder = builder

308 self.is_xml = builder.is_xml

309 self.known_xml = self.is_xml

310 self._namespaces = dict()

311 self.parse_only = parse_only

312

313 if hasattr(markup, 'read'): # It's a file-type object.

314 markup = markup.read()

315 elif len(markup) <= 256 and (

316 (isinstance(markup, bytes) and not b'<' in markup)

317 or (isinstance(markup, str) and not '<' in markup)

318 ):

319 # Issue warnings for a couple beginner problems

320 # involving passing non-markup to Beautiful Soup.

321 # Beautiful Soup will still parse the input as markup,

322 # since that is sometimes the intended behavior.

323 if not self._markup_is_url(markup):

324 self._markup_resembles_filename(markup)

325

326 rejections = []

327 success = False

328 for (self.markup, self.original_encoding, self.declared_html_encoding,

329 self.contains_replacement_characters) in (

330 self.builder.prepare_markup(

331 markup, from_encoding, exclude_encodings=exclude_encodings)):

332 self.reset()

333 self.builder.initialize_soup(self)

334 try:

335 self._feed()

336 success = True

337 break

338 except ParserRejectedMarkup as e:

339 rejections.append(e)

340 pass

341

342 if not success:

343 other_exceptions = [str(e) for e in rejections]

344 raise ParserRejectedMarkup(

345 "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)

346 )

347

348 # Clear out the markup and remove the builder's circular

349 # reference to this object.

350 self.markup = None

351 self.builder.soup = None

352

353 def _clone(self):

354 """Create a new BeautifulSoup object with the same TreeBuilder,

355 but not associated with any markup.

356

357 This is the first step of the deepcopy process.

358 """

359 clone = type(self)("", None, self.builder)

360

361 # Keep track of the encoding of the original document,

362 # since we won't be parsing it again.

363 clone.original_encoding = self.original_encoding

364 return clone

365

366 def __getstate__(self):

367 # Frequently a tree builder can't be pickled.

368 d = dict(self.__dict__)

369 if 'builder' in d and d['builder'] is not None and not self.builder.picklable:

370 d['builder'] = type(self.builder)

371 # Store the contents as a Unicode string.

372 d['contents'] = []

373 d['markup'] = self.decode()

374

375 # If _most_recent_element is present, it's a Tag object left

376 # over from initial parse. It might not be picklable and we

377 # don't need it.

378 if '_most_recent_element' in d:

379 del d['_most_recent_element']

380 return d

381

382 def __setstate__(self, state):

383 # If necessary, restore the TreeBuilder by looking it up.

384 self.__dict__ = state

385 if isinstance(self.builder, type):

386 self.builder = self.builder()

387 elif not self.builder:

388 # We don't know which builder was used to build this

389 # parse tree, so use a default we know is always available.

390 self.builder = HTMLParserTreeBuilder()

391 self.builder.soup = self

392 self.reset()

393 self._feed()

394 return state

395

396

397 @classmethod

398 def _decode_markup(cls, markup):

399 """Ensure `markup` is bytes so it's safe to send into warnings.warn.

400

401 TODO: warnings.warn had this problem back in 2010 but it might not

402 anymore.

403 """

404 if isinstance(markup, bytes):

405 decoded = markup.decode('utf-8', 'replace')

406 else:

407 decoded = markup

408 return decoded

409

410 @classmethod

411 def _markup_is_url(cls, markup):

412 """Error-handling method to raise a warning if incoming markup looks

413 like a URL.

414

415 :param markup: A string.

416 :return: Whether or not the markup resembles a URL

417 closely enough to justify a warning.

418 """

419 if isinstance(markup, bytes):

420 space = b' '

421 cant_start_with = (b"http:", b"https:")

422 elif isinstance(markup, str):

423 space = ' '

424 cant_start_with = ("http:", "https:")

425 else:

426 return False

427

428 if any(markup.startswith(prefix) for prefix in cant_start_with):

429 if not space in markup:

430 warnings.warn(

431 'The input looks more like a URL than markup. You may want to use'

432 ' an HTTP client like requests to get the document behind'

433 ' the URL, and feed that document to Beautiful Soup.',

434 MarkupResemblesLocatorWarning,

435 stacklevel=3

436 )

437 return True

438 return False

439

440 @classmethod

441 def _markup_resembles_filename(cls, markup):

442 """Error-handling method to raise a warning if incoming markup

443 resembles a filename.

444

445 :param markup: A bytestring or string.

446 :return: Whether or not the markup resembles a filename

447 closely enough to justify a warning.

448 """

449 path_characters = '/\\'

450 extensions = ['.html', '.htm', '.xml', '.xhtml', '.txt']

451 if isinstance(markup, bytes):

452 path_characters = path_characters.encode("utf8")

453 extensions = [x.encode('utf8') for x in extensions]

454 filelike = False

455 if any(x in markup for x in path_characters):

456 filelike = True

457 else:

458 lower = markup.lower()

459 if any(lower.endswith(ext) for ext in extensions):

460 filelike = True

461 if filelike:

462 warnings.warn(

463 'The input looks more like a filename than markup. You may'

464 ' want to open this file and pass the filehandle into'

465 ' Beautiful Soup.',

466 MarkupResemblesLocatorWarning, stacklevel=3

467 )

468 return True

469 return False

470

471 def _feed(self):

472 """Internal method that parses previously set markup, creating a large

473 number of Tag and NavigableString objects.

474 """

475 # Convert the document to Unicode.

476 self.builder.reset()

477

478 self.builder.feed(self.markup)

479 # Close out any unfinished strings and close all the open tags.

480 self.endData()

481 while self.currentTag.name != self.ROOT_TAG_NAME:

482 self.popTag()

483

484 def reset(self):

485 """Reset this object to a state as though it had never parsed any

486 markup.

487 """

488 Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)

489 self.hidden = 1

490 self.builder.reset()

491 self.current_data = []

492 self.currentTag = None

493 self.tagStack = []

494 self.open_tag_counter = Counter()

495 self.preserve_whitespace_tag_stack = []

496 self.string_container_stack = []

497 self._most_recent_element = None

498 self.pushTag(self)

499

500 def new_tag(self, name, namespace=None, nsprefix=None, attrs={},

501 sourceline=None, sourcepos=None, **kwattrs):

502 """Create a new Tag associated with this BeautifulSoup object.

503

504 :param name: The name of the new Tag.

505 :param namespace: The URI of the new Tag's XML namespace, if any.

506 :param prefix: The prefix for the new Tag's XML namespace, if any.

507 :param attrs: A dictionary of this Tag's attribute values; can

508 be used instead of `kwattrs` for attributes like 'class'

509 that are reserved words in Python.

510 :param sourceline: The line number where this tag was

511 (purportedly) found in its source document.

512 :param sourcepos: The character position within `sourceline` where this

513 tag was (purportedly) found.

514 :param kwattrs: Keyword arguments for the new Tag's attribute values.

515

516 """

517 kwattrs.update(attrs)

518 return self.element_classes.get(Tag, Tag)(

519 None, self.builder, name, namespace, nsprefix, kwattrs,

520 sourceline=sourceline, sourcepos=sourcepos

521 )

522

523 def string_container(self, base_class=None):

524 container = base_class or NavigableString

525

526 # There may be a general override of NavigableString.

527 container = self.element_classes.get(

528 container, container

529 )

530

531 # On top of that, we may be inside a tag that needs a special

532 # container class.

533 if self.string_container_stack and container is NavigableString:

534 container = self.builder.string_containers.get(

535 self.string_container_stack[-1].name, container

536 )

537 return container

538

539 def new_string(self, s, subclass=None):

540 """Create a new NavigableString associated with this BeautifulSoup

541 object.

542 """

543 container = self.string_container(subclass)

544 return container(s)

545

546 def insert_before(self, *args):

547 """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement

548 it because there is nothing before or after it in the parse tree.

549 """

550 raise NotImplementedError("BeautifulSoup objects don't support insert_before().")

551

552 def insert_after(self, *args):

553 """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement

554 it because there is nothing before or after it in the parse tree.

555 """

556 raise NotImplementedError("BeautifulSoup objects don't support insert_after().")

557

558 def popTag(self):

559 """Internal method called by _popToTag when a tag is closed."""

560 tag = self.tagStack.pop()

561 if tag.name in self.open_tag_counter:

562 self.open_tag_counter[tag.name] -= 1

563 if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:

564 self.preserve_whitespace_tag_stack.pop()

565 if self.string_container_stack and tag == self.string_container_stack[-1]:

566 self.string_container_stack.pop()

567 #print("Pop", tag.name)

568 if self.tagStack:

569 self.currentTag = self.tagStack[-1]

570 return self.currentTag

571

572 def pushTag(self, tag):

573 """Internal method called by handle_starttag when a tag is opened."""

574 #print("Push", tag.name)

575 if self.currentTag is not None:

576 self.currentTag.contents.append(tag)

577 self.tagStack.append(tag)

578 self.currentTag = self.tagStack[-1]

579 if tag.name != self.ROOT_TAG_NAME:

580 self.open_tag_counter[tag.name] += 1

581 if tag.name in self.builder.preserve_whitespace_tags:

582 self.preserve_whitespace_tag_stack.append(tag)

583 if tag.name in self.builder.string_containers:

584 self.string_container_stack.append(tag)

585

586 def endData(self, containerClass=None):

587 """Method called by the TreeBuilder when the end of a data segment

588 occurs.

589 """

590 if self.current_data:

591 current_data = ''.join(self.current_data)

592 # If whitespace is not preserved, and this string contains

593 # nothing but ASCII spaces, replace it with a single space

594 # or newline.

595 if not self.preserve_whitespace_tag_stack:

596 strippable = True

597 for i in current_data:

598 if i not in self.ASCII_SPACES:

599 strippable = False

600 break

601 if strippable:

602 if '\n' in current_data:

603 current_data = '\n'

604 else:

605 current_data = ' '

606

607 # Reset the data collector.

608 self.current_data = []

609

610 # Should we add this string to the tree at all?

611 if self.parse_only and len(self.tagStack) <= 1 and \

612 (not self.parse_only.text or \

613 not self.parse_only.search(current_data)):

614 return

615

616 containerClass = self.string_container(containerClass)

617 o = containerClass(current_data)

618 self.object_was_parsed(o)

619

620 def object_was_parsed(self, o, parent=None, most_recent_element=None):

621 """Method called by the TreeBuilder to integrate an object into the parse tree."""

622 if parent is None:

623 parent = self.currentTag

624 if most_recent_element is not None:

625 previous_element = most_recent_element

626 else:

627 previous_element = self._most_recent_element

628

629 next_element = previous_sibling = next_sibling = None

630 if isinstance(o, Tag):

631 next_element = o.next_element

632 next_sibling = o.next_sibling

633 previous_sibling = o.previous_sibling

634 if previous_element is None:

635 previous_element = o.previous_element

636

637 fix = parent.next_element is not None

638

639 o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)

640

641 self._most_recent_element = o

642 parent.contents.append(o)

643

644 # Check if we are inserting into an already parsed node.

645 if fix:

646 self._linkage_fixer(parent)

647

648 def _linkage_fixer(self, el):

649 """Make sure linkage of this fragment is sound."""

650

651 first = el.contents[0]

652 child = el.contents[-1]

653 descendant = child

654

655 if child is first and el.parent is not None:

656 # Parent should be linked to first child

657 el.next_element = child

658 # We are no longer linked to whatever this element is

659 prev_el = child.previous_element

660 if prev_el is not None and prev_el is not el:

661 prev_el.next_element = None

662 # First child should be linked to the parent, and no previous siblings.

663 child.previous_element = el

664 child.previous_sibling = None

665

666 # We have no sibling as we've been appended as the last.

667 child.next_sibling = None

668

669 # This index is a tag, dig deeper for a "last descendant"

670 if isinstance(child, Tag) and child.contents:

671 descendant = child._last_descendant(False)

672

673 # As the final step, link last descendant. It should be linked

674 # to the parent's next sibling (if found), else walk up the chain

675 # and find a parent with a sibling. It should have no next sibling.

676 descendant.next_element = None

677 descendant.next_sibling = None

678 target = el

679 while True:

680 if target is None:

681 break

682 elif target.next_sibling is not None:

683 descendant.next_element = target.next_sibling

684 target.next_sibling.previous_element = child

685 break

686 target = target.parent

687

688 def _popToTag(self, name, nsprefix=None, inclusivePop=True):

689 """Pops the tag stack up to and including the most recent

690 instance of the given tag.

691

692 If there are no open tags with the given name, nothing will be

693 popped.

694

695 :param name: Pop up to the most recent tag with this name.

696 :param nsprefix: The namespace prefix that goes with `name`.

697 :param inclusivePop: It this is false, pops the tag stack up

698 to but *not* including the most recent instqance of the

699 given tag.

700

701 """

702 #print("Popping to %s" % name)

703 if name == self.ROOT_TAG_NAME:

704 # The BeautifulSoup object itself can never be popped.

705 return

706

707 most_recently_popped = None

708

709 stack_size = len(self.tagStack)

710 for i in range(stack_size - 1, 0, -1):

711 if not self.open_tag_counter.get(name):

712 break

713 t = self.tagStack[i]

714 if (name == t.name and nsprefix == t.prefix):

715 if inclusivePop:

716 most_recently_popped = self.popTag()

717 break

718 most_recently_popped = self.popTag()

719

720 return most_recently_popped

721

722 def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None,

723 sourcepos=None, namespaces=None):

724 """Called by the tree builder when a new tag is encountered.

725

726 :param name: Name of the tag.

727 :param nsprefix: Namespace prefix for the tag.

728 :param attrs: A dictionary of attribute values.

729 :param sourceline: The line number where this tag was found in its

730 source document.

731 :param sourcepos: The character position within `sourceline` where this

732 tag was found.

733 :param namespaces: A dictionary of all namespace prefix mappings

734 currently in scope in the document.

735

736 If this method returns None, the tag was rejected by an active

737 SoupStrainer. You should proceed as if the tag had not occurred

738 in the document. For instance, if this was a self-closing tag,

739 don't call handle_endtag.

740 """

741 # print("Start tag %s: %s" % (name, attrs))

742 self.endData()

743

744 if (self.parse_only and len(self.tagStack) <= 1

745 and (self.parse_only.text

746 or not self.parse_only.search_tag(name, attrs))):

747 return None

748

749 tag = self.element_classes.get(Tag, Tag)(

750 self, self.builder, name, namespace, nsprefix, attrs,

751 self.currentTag, self._most_recent_element,

752 sourceline=sourceline, sourcepos=sourcepos,

753 namespaces=namespaces

754 )

755 if tag is None:

756 return tag

757 if self._most_recent_element is not None:

758 self._most_recent_element.next_element = tag

759 self._most_recent_element = tag

760 self.pushTag(tag)

761 return tag

762

763 def handle_endtag(self, name, nsprefix=None):

764 """Called by the tree builder when an ending tag is encountered.

765

766 :param name: Name of the tag.

767 :param nsprefix: Namespace prefix for the tag.

768 """

769 #print("End tag: " + name)

770 self.endData()

771 self._popToTag(name, nsprefix)

772

773 def handle_data(self, data):

774 """Called by the tree builder when a chunk of textual data is encountered."""

775 self.current_data.append(data)

776

777 def decode(self, pretty_print=False,

778 eventual_encoding=DEFAULT_OUTPUT_ENCODING,

779 formatter="minimal", iterator=None):

780 """Returns a string or Unicode representation of the parse tree

781 as an HTML or XML document.

782

783 :param pretty_print: If this is True, indentation will be used to

784 make the document more readable.

785 :param eventual_encoding: The encoding of the final document.

786 If this is None, the document will be a Unicode string.

787 """

788 if self.is_xml:

789 # Print the XML declaration

790 encoding_part = ''

791 if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS:

792 # This is a special Python encoding; it can't actually

793 # go into an XML document because it means nothing

794 # outside of Python.

795 eventual_encoding = None

796 if eventual_encoding != None:

797 encoding_part = ' encoding="%s"' % eventual_encoding

798 prefix = '<?xml version="1.0"%s?>\n' % encoding_part

799 else:

800 prefix = ''

801 if not pretty_print:

802 indent_level = None

803 else:

804 indent_level = 0

805 return prefix + super(BeautifulSoup, self).decode(

806 indent_level, eventual_encoding, formatter, iterator)

807

808# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'

809_s = BeautifulSoup

810_soup = BeautifulSoup

811

812class BeautifulStoneSoup(BeautifulSoup):

813 """Deprecated interface to an XML parser."""

814

815 def __init__(self, *args, **kwargs):

816 kwargs['features'] = 'xml'

817 warnings.warn(

818 'The BeautifulStoneSoup class is deprecated. Instead of using '

819 'it, pass features="xml" into the BeautifulSoup constructor.',

820 DeprecationWarning, stacklevel=2

821 )

822 super(BeautifulStoneSoup, self).__init__(*args, **kwargs)

823

824

825class StopParsing(Exception):

826 """Exception raised by a TreeBuilder if it's unable to continue parsing."""

827 pass

828

829class FeatureNotFound(ValueError):

830 """Exception raised by the BeautifulSoup constructor if no parser with the

831 requested features is found.

832 """

833 pass

834

835

836#If this file is run as a script, act as an HTML pretty-printer.

837if __name__ == '__main__':

838 import sys

839 soup = BeautifulSoup(sys.stdin)

840 print((soup.prettify()))

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bs4/init.py: 16%

354 statements